import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
from tensorflow.keras import datasets
my_string = "i will be an ai expert"
print(my_string.split('a', maxsplit=1))
print(my_string.split('a',maxsplit=-1))
print(my_string.rsplit('a',maxsplit=1))
print(my_string.find('l'))
print(my_string.find('1'))
print(my_string.rfind('l'))
print(my_string.find('be', 6, 11)) #6번과 11번 사이에서 찾아줌.
print(my_string.index('l'))
#print(my_string.index('1')) --> 에러남
x = ' abc '
print( x.strip() )
x = '***a**bc**'
print( x.strip('*') )
x = '***!a*!*bc**'
print( x.strip('*!') ) #strip은 strip이란 메소드 안에 들어간 어떠한 char로든 다 앞뒤로 빼줌.
y = 'I would like to learn deep learning'
print( y.replace('deep learning', 'ML') )
print( y.replace('python', 'ML') )
z = 'Abcdef3'
print( z.isnumeric() )
print( z.isalpha() )
print( z.isalnum() )
z1 = 'Abc def1'
print( z1.istitle() )
print( z1.title() )
print( z1.swapcase() )
x1 = ' '
print( x1.isspace() )
['i will be ', 'n ai expert'] ['i will be ', 'n ', 'i expert'] ['i will be an ', 'i expert'] 4 -1 5 7 4 abc a**bc a*!*bc I would like to learn ML I would like to learn deep learning False False True False Abc Def1 aBC DEF1 True
a = 3 + 5j
print( type(a) )
print( a.conjugate() )
print( a.imag ) #이건 method가 아니라 attribute임.
print( a.real )
a = 3
print( type(a) )
print( a.conjugate() )
print( a.denominator ) #분모
print( a.numerator ) #분자
<class 'complex'> (3-5j) 5.0 3.0 <class 'int'> 3 1 3
a = 3
b = 4
c = 5
print( a, b, c )
# print( somethign you want to print, sep(default는 space), end(default는 newline))
print( a, b, c, sep=' ', end='\n' )
print( a, b, c, sep=' ' )
print( a, b, c, sep='ccc' )
print( a, b, c, sep='\t' )
print( a, b, c, sep='\n' )
print("let's learn about end")
print( c, end=' ')
print( b, end='\t')
3 4 5 3 4 5 3 4 5 3ccc4ccc5 3 4 5 3 4 5 let's learn about end 5 4
#format method
name = 'Gyurin'
location = 'Daejeon'
ID = 2019103877
print( "My name is", name, "in", location, "with ID", ID )
print( "My name is {} in {} with ID {}".format(name, location, ID) )
print( "My name is {2} in {1} with ID {0}".format(name, location, ID) )
print( f"My name is {name} in {location} with ID {ID}" ) #f-string
My name is Gyurin in Daejeon with ID 2019103877 My name is Gyurin in Daejeon with ID 2019103877 My name is 2019103877 in Daejeon with ID Gyurin My name is Gyurin in Daejeon with ID 2019103877
a = 100/3
print( a )
print( f"The length is {a} meter" )
print( f"The length is {a:.2f} meter" ) #2는 precision
print( f"The length is {a:10.2f} meter" ) #10은 minimum length to represent the number, 33.33의 5개 제외하고 5blank 생김.
print( f"The length is {a:15.2f} meter" )
33.333333333333336 The length is 33.333333333333336 meter The length is 33.33 meter The length is 33.33 meter The length is 33.33 meter
x=5
y=7
z=x/y
print("{} is divided by {} is {}".format(x, y, z)) #format
print("{} is divided by {} is {:.3}".format(x, y, z))
print(f"{x} is divided by {y} is {z:.3}") #f-string
print("%d is divided by %d is %.3f" %(x,y,z)) #old style, placeholder
5 is divided by 7 is 0.7142857142857143 5 is divided by 7 is 0.714 5 is divided by 7 is 0.714 5 is divided by 7 is 0.714
student = [100, 99, 45, 'james', 87.5]
print(type(student))
print(student[0])
print(student[-1])
print(student[0:4:2])
<class 'list'> 100 87.5 [100, 45]
my_list = [10, 20, 30]
your_list = [40, 50, 60]
print(my_list+your_list)
print(my_list * 3)
print(len(my_list))
[10, 20, 30, 40, 50, 60] [10, 20, 30, 10, 20, 30, 10, 20, 30] 3
#built-in function : print, type, len, input
#list는 mutable, string은 immutable
my_string = "hello"
print(id(my_string))
my_string = "hello2"
print(id(my_string))
140503901504304 140502511168240
a=3
print(id(a))
a=5
print(id(a))
94731100547648 94731100547712
my_list = [10,20,30]
my_list.append(40) #이게 return하는 건 없다.
your_list = my_list.append(40)
print(my_list)
print(your_list)
my_string = 'hello'
your_string = my_string.upper()
print(your_string)
print(my_string)
[10, 20, 30, 40, 40] None HELLO hello
import pandas as pd
a = pd.DataFrame( [[50, 60, 70], [10,20,30]])
a
| 0 | 1 | 2 | |
|---|---|---|---|
| 0 | 50 | 60 | 70 |
| 1 | 10 | 20 | 30 |
type(a)
pandas.core.frame.DataFrame
a.sort_values(by=0, inplace=True)
a
| 0 | 1 | 2 | |
|---|---|---|---|
| 1 | 10 | 20 | 30 |
| 0 | 50 | 60 | 70 |
b = a.sort_values(by=0, inplace=False)
a
| 0 | 1 | 2 | |
|---|---|---|---|
| 1 | 10 | 20 | 30 |
| 0 | 50 | 60 | 70 |
my_list = [10,20,30]
my_list.insert(1,100) #근데 append가 훨씬 많이 쓰인다.
my_list
[10, 100, 20, 30]
my_list = [10,20,30]
my_list.pop()
30
my_list
[10, 20]
my_list=[10,20,30]
my_list.pop(0)
10
my_list
[20, 30]
my_list=[10,20,30,20,30,40,20,50,80]
my_list.remove(20)
my_list
[10, 30, 20, 30, 40, 20, 50, 80]
my_list.clear()
my_list
[]
a = [10,20,30]
b=a
a
[10, 20, 30]
b
[10, 20, 30]
a[0]=100
a
[100, 20, 30]
b
[100, 20, 30]
id(a)
140502510505008
id(b)
140502510505008
a=[10,20,30]
b=a.copy()
a
[10, 20, 30]
b
[10, 20, 30]
a[0]=100
a
[100, 20, 30]
b
[10, 20, 30]
id(a)
140502510628128
id(b)
140502511285344
my_list=[10,20,30,10]
my_list.count(10)
2
my_list.index(10)
0
your_list = [20,40,50]
my_list.extend(your_list)
my_list
[10, 20, 30, 10, 20, 40, 50]
my_list.sort()
my_list
[10, 10, 20, 20, 30, 40, 50]
my_list.sort(reverse=True)
my_list
[50, 40, 30, 20, 20, 10, 10]
my_list = [10,30,20,40,50]
my_list.reverse()
my_list
[50, 40, 20, 30, 10]
my_list = [10,30,20,40,50]
my_list = my_list[::-1]
my_list
[50, 40, 20, 30, 10]
my_list = [10,30,20,40,50]
sum(my_list)
150
max(my_list)
50
min(my_list)
10
a=[10,20,30]
b=[20,50,60]
c=a+b
c
[10, 20, 30, 20, 50, 60]
import numpy as np
a = np.array([10,20,30])
b = np.array([40,50,60])
c = a+b
c
array([50, 70, 90])
c = a*b
c
array([ 400, 1000, 1800])
a = a.reshape((3,1))
a
array([[10],
[20],
[30]])
b = b.reshape(1,3)
b
array([[40, 50, 60]])
np.matmul(a,b)
array([[ 400, 500, 600],
[ 800, 1000, 1200],
[1200, 1500, 1800]])
import numpy as np
array1=np.array([10,20,30])
array2=np.array([40,50,60])
print(type(array1))
print(type(array2))
array_sum = array1+array2
print(array_sum)
array_mul = array1 * array2
print(array_mul)
array1_mul = array1 * 3
print(array1_mul)
print(array1_mul * array1)
array3 = np.array([40,50,60,70])
# print(array1 * array3) #앞의 것 렬, 뒤의 것 행 개수 안맞아서 오류.
<class 'numpy.ndarray'> <class 'numpy.ndarray'> [50 70 90] [ 400 1000 1800] [30 60 90] [ 300 1200 2700]
array1 = np.array([ [10,20,30], [1,2,3] ] )
print(array1.shape)
array2 = np.array( [ [10,1], [20,2], [30,3] ] )
print(array2.shape)
array_mul = np.matmul(array1, array2)
print(array_mul)
(2, 3) (3, 2) [[1400 140] [ 140 14]]
my_tuple = (10,20,30)
print(type(my_tuple))
my_list = [10,20,30]
print(type(my_list))
my_tupe = 10,20,30
print(my_tuple)
print(type(my_tuple))
another_tuple = tuple(my_list)
print(type(another_tuple))
another_list = list(another_tuple)
print(type(another_list))
print(len(my_tuple))
<class 'tuple'> <class 'list'> (10, 20, 30) <class 'tuple'> <class 'tuple'> <class 'list'> 3
a = [] #initialize
print(type(a))
b=()
print(type(b))
<class 'list'> <class 'tuple'>
#indexing and slicing
my_tuple = (10,20,30,40)
print( my_tuple[0] )
print( my_tuple[1] )
print( my_tuple[-1] )
print( my_tuple[0:3] )
print( my_tuple[ : ] )
print( my_tuple[ :3] )
print( my_tuple[ : : -1] )
10 20 40 (10, 20, 30) (10, 20, 30, 40) (10, 20, 30) (40, 30, 20, 10)
Tuple is immutable (unchangeable)
# my_tuple[0] = 100 #하면 에러남.
Operation +와*
my_tuple = (10,20,30,40)
your_tuple = (1,2,3,4)
print( my_tuple + your_tuple )
print( my_tuple * 3 )
print( my_tuple.count(10) )
print( my_tuple.index(10) )
(10, 20, 30, 40, 1, 2, 3, 4) (10, 20, 30, 40, 10, 20, 30, 40, 10, 20, 30, 40) 1 0
Nested list, Nested tuple
nested_list = [[1,2, [3, 7] ], [4,5], 6]
print( type(nested_list) )
print( nested_list[0] )
print( nested_list[0][0] )
<class 'list'> [1, 2, [3, 7]] 1
nested_tuple = ( (1,2,(3,6)), (4,5), 6)
print( type(nested_tuple) )
print( nested_tuple[0] )
print( nested_tuple[0][0] )
print( nested_tuple[0][2][1] )
<class 'tuple'> (1, 2, (3, 6)) 1 6
array1 = np.array( [[1,2,3], [10,20,30]])
print(array1)
print(array1.shape)
print(array1[0])
print(array1[0][0])
[[ 1 2 3] [10 20 30]] (2, 3) [1 2 3] 1
my_list = [ [1,2,3], (4,5), 6, 7 ]
print( type(my_list) )
print( my_list[0] )
print( type(my_list[0]) )
print( my_list[1] )
print( type(my_list[1]) )
print( my_list[2] )
print( type(my_list[2]) )
<class 'list'> [1, 2, 3] <class 'list'> (4, 5) <class 'tuple'> 6 <class 'int'>
Set is unordered collections of unique elements. no indexing, no slicing.
my_set = {1,2,3,4,5}
print( type(my_set) )
my_set = {1,2,3,4,5, 1,2,1,2,1,1,1,1,1}
print( my_set )
print( type(my_set) )
<class 'set'>
{1, 2, 3, 4, 5}
<class 'set'>
my_set = { }
print( type(my_set) )
<class 'dict'>
my_dic = {'eng':100, 'math':50, 'science':20}
print( type(my_dic) )
<class 'dict'>
my_set = { 1,2,3,4,5, 'hello', (1,2,3), 1,2,3,4 }
print( len(my_set) )
7
Indexing, slicing, operations +와* are not available for set.
set1 = {1,2,3}
set2 = {10,20,30}
set1.add(4) #set is muttable
print( set1 )
set1.add(1)
print( set1 )
set1.update( set2 ) #update란 method 안에는 element가 아니라 set을 써야함.
print( set1 )
set1.update( 'god' ) #update에 string 넣으면 g, o, d가 split 되어서 set1에 들어감.
print( set1 )
{1, 2, 3, 4}
{1, 2, 3, 4}
{1, 2, 3, 4, 20, 10, 30}
{1, 2, 3, 4, 10, 'd', 'g', 20, 30, 'o'}
immutable : Tuple, Number, String
mutable : List, Set
set은 index 없고, +, * 안된다.
set은 add(), update()란 메소드 있다. a.add(2), a.update({6})
remove(), discard()도 있다. a.remove(4), a.discard({2,3})
remove는 없는 거 뺄려고 하면 에러남, discard는 에러 안남.
tuple은 두가지 메소드만 : index(), count()
list는 11 method : append, extend, inset 등
a = [1,2,3,4]
a.append([5,6,7])
a
[1, 2, 3, 4, [5, 6, 7]]
union (합집합)
intersection (교집합)
difference (차집합)
a = {1,2,3}
b = {3,4,5}
c = a.union(b)
print(a)
print(c)
{1, 2, 3}
{1, 2, 3, 4, 5}
a = {1,2,3}
b = {3,4,5}
# c = a.union_update(b) # update() = union_update()라서 이런 건 없는거다 그냥.
print(a)
print(c)
{1, 2, 3}
{1, 2, 3, 4, 5}
a = {1,2,3}
b = {3,4,5}
c = a.intersection(b)
print(a)
print(c)
{1, 2, 3}
{3}
a = {1,2,3}
b = {3,4,5}
c = a.intersection_update(b)
print(a)
print(c)
{3}
None
a = {1,2,3}
b = {3,4,5}
c = a.difference(b)
print(a)
print(c)
{1, 2, 3}
{1, 2}
a = {1,2,3}
b = {3,4,5}
c = a.difference_update(b)
print(a)
print(c)
{1, 2}
None
subset (부분집합)
issubset()이라는 메소드
a = {1,2,3}
b = {1}
b.issubset(a)
True
set은 index()와 count()가 없다. count는 {89}.issubset(a)로 대체 가능하니까!
Dictionary : a value is retrieved by a key. it is unordered.
a = {key:value, key:value}
key는 list, tuple, string 등 아무거나 다 됨.
a = { 'a' : 1, 'b' : 2 }
print( a['a'] )
print( a['b'] )
1 2
can't do slicing for a dictionary.
+, *도 안된다. 이거는 list, tuple만 되는 애들임.
dict의 메소드 :
keys()
values()
items()
get()
clear()
dica = { 'a' : 1, 'b' : 2 }
print( dica.keys() )
print( dica.values() )
print( dica.items() )
dict_keys(['a', 'b'])
dict_values([1, 2])
dict_items([('a', 1), ('b', 2)])
dica = { 'a' : 1, 'b' : 2 }
for i in a.values():
print(i)
for i in a.items():
print(i)
1
2
('a', 1)
('b', 2)
dica = { 'a' : 1, 'b' : 2 }
print( dica['a'] )
# print( dica['d'] ) #에러
print( dica.get('a') )
print( dica.get('d') )
1 1 None
dica = { 'a' : 1, 'b' : 2 }
dica.clear()
print( dica )
{}
dica = { 'a' : 1, 'b' : 2 }
dicb = dica
print( dica )
print( dicb )
dica.clear()
print( dica )
print( dicb )
{'a': 1, 'b': 2}
{'a': 1, 'b': 2}
{}
{}
이제부터 simple ai model 배울거다.
tensorflow 는 library이고 여러 package 있다. 그중 한 패키지가 keras임.
from tensorflow import keras
keras.datasets.imdb.load_data()
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz 17465344/17464789 [==============================] - 0s 0us/step 17473536/17464789 [==============================] - 0s 0us/step
((array([list([1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 22665, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 21631, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 19193, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 10311, 8, 4, 107, 117, 5952, 15, 256, 4, 31050, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 12118, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]),
list([1, 194, 1153, 194, 8255, 78, 228, 5, 6, 1463, 4369, 5012, 134, 26, 4, 715, 8, 118, 1634, 14, 394, 20, 13, 119, 954, 189, 102, 5, 207, 110, 3103, 21, 14, 69, 188, 8, 30, 23, 7, 4, 249, 126, 93, 4, 114, 9, 2300, 1523, 5, 647, 4, 116, 9, 35, 8163, 4, 229, 9, 340, 1322, 4, 118, 9, 4, 130, 4901, 19, 4, 1002, 5, 89, 29, 952, 46, 37, 4, 455, 9, 45, 43, 38, 1543, 1905, 398, 4, 1649, 26, 6853, 5, 163, 11, 3215, 10156, 4, 1153, 9, 194, 775, 7, 8255, 11596, 349, 2637, 148, 605, 15358, 8003, 15, 123, 125, 68, 23141, 6853, 15, 349, 165, 4362, 98, 5, 4, 228, 9, 43, 36893, 1157, 15, 299, 120, 5, 120, 174, 11, 220, 175, 136, 50, 9, 4373, 228, 8255, 5, 25249, 656, 245, 2350, 5, 4, 9837, 131, 152, 491, 18, 46151, 32, 7464, 1212, 14, 9, 6, 371, 78, 22, 625, 64, 1382, 9, 8, 168, 145, 23, 4, 1690, 15, 16, 4, 1355, 5, 28, 6, 52, 154, 462, 33, 89, 78, 285, 16, 145, 95]),
list([1, 14, 47, 8, 30, 31, 7, 4, 249, 108, 7, 4, 5974, 54, 61, 369, 13, 71, 149, 14, 22, 112, 4, 2401, 311, 12, 16, 3711, 33, 75, 43, 1829, 296, 4, 86, 320, 35, 534, 19, 263, 4821, 1301, 4, 1873, 33, 89, 78, 12, 66, 16, 4, 360, 7, 4, 58, 316, 334, 11, 4, 1716, 43, 645, 662, 8, 257, 85, 1200, 42, 1228, 2578, 83, 68, 3912, 15, 36, 165, 1539, 278, 36, 69, 44076, 780, 8, 106, 14, 6905, 1338, 18, 6, 22, 12, 215, 28, 610, 40, 6, 87, 326, 23, 2300, 21, 23, 22, 12, 272, 40, 57, 31, 11, 4, 22, 47, 6, 2307, 51, 9, 170, 23, 595, 116, 595, 1352, 13, 191, 79, 638, 89, 51428, 14, 9, 8, 106, 607, 624, 35, 534, 6, 227, 7, 129, 113]),
...,
list([1, 11, 6, 230, 245, 6401, 9, 6, 1225, 446, 86527, 45, 2174, 84, 8322, 4007, 21, 4, 912, 84, 14532, 325, 725, 134, 15271, 1715, 84, 5, 36, 28, 57, 1099, 21, 8, 140, 8, 703, 5, 11656, 84, 56, 18, 1644, 14, 9, 31, 7, 4, 9406, 1209, 2295, 26094, 1008, 18, 6, 20, 207, 110, 563, 12, 8, 2901, 17793, 8, 97, 6, 20, 53, 4767, 74, 4, 460, 364, 1273, 29, 270, 11, 960, 108, 45, 40, 29, 2961, 395, 11, 6, 4065, 500, 7, 14492, 89, 364, 70, 29, 140, 4, 64, 4780, 11, 4, 2678, 26, 178, 4, 529, 443, 17793, 5, 27, 710, 117, 74936, 8123, 165, 47, 84, 37, 131, 818, 14, 595, 10, 10, 61, 1242, 1209, 10, 10, 288, 2260, 1702, 34, 2901, 17793, 4, 65, 496, 4, 231, 7, 790, 5, 6, 320, 234, 2766, 234, 1119, 1574, 7, 496, 4, 139, 929, 2901, 17793, 7750, 5, 4241, 18, 4, 8497, 13164, 250, 11, 1818, 7561, 4, 4217, 5408, 747, 1115, 372, 1890, 1006, 541, 9303, 7, 4, 59, 11027, 4, 3586, 22459]),
list([1, 1446, 7079, 69, 72, 3305, 13, 610, 930, 8, 12, 582, 23, 5, 16, 484, 685, 54, 349, 11, 4120, 2959, 45, 58, 1466, 13, 197, 12, 16, 43, 23, 21469, 5, 62, 30, 145, 402, 11, 4131, 51, 575, 32, 61, 369, 71, 66, 770, 12, 1054, 75, 100, 2198, 8, 4, 105, 37, 69, 147, 712, 75, 3543, 44, 257, 390, 5, 69, 263, 514, 105, 50, 286, 1814, 23, 4, 123, 13, 161, 40, 5, 421, 4, 116, 16, 897, 13, 40691, 40, 319, 5872, 112, 6700, 11, 4803, 121, 25, 70, 3468, 4, 719, 3798, 13, 18, 31, 62, 40, 8, 7200, 4, 29455, 7, 14, 123, 5, 942, 25, 8, 721, 12, 145, 5, 202, 12, 160, 580, 202, 12, 6, 52, 58, 11418, 92, 401, 728, 12, 39, 14, 251, 8, 15, 251, 5, 21213, 12, 38, 84, 80, 124, 12, 9, 23]),
list([1, 17, 6, 194, 337, 7, 4, 204, 22, 45, 254, 8, 106, 14, 123, 4, 12815, 270, 14437, 5, 16923, 12255, 732, 2098, 101, 405, 39, 14, 1034, 4, 1310, 9, 115, 50, 305, 12, 47, 4, 168, 5, 235, 7, 38, 111, 699, 102, 7, 4, 4039, 9245, 9, 24, 6, 78, 1099, 17, 2345, 16553, 21, 27, 9685, 6139, 5, 29043, 1603, 92, 1183, 4, 1310, 7, 4, 204, 42, 97, 90, 35, 221, 109, 29, 127, 27, 118, 8, 97, 12, 157, 21, 6789, 85010, 9, 6, 66, 78, 1099, 4, 631, 1191, 5, 2642, 272, 191, 1070, 6, 7585, 8, 2197, 70907, 10755, 544, 5, 383, 1271, 848, 1468, 12183, 497, 16876, 8, 1597, 8778, 19280, 21, 60, 27, 239, 9, 43, 8368, 209, 405, 10, 10, 12, 764, 40, 4, 248, 20, 12, 16, 5, 174, 1791, 72, 7, 51, 6, 1739, 22, 4, 204, 131, 9])],
dtype=object), array([1, 0, 0, ..., 0, 1, 0])),
(array([list([1, 591, 202, 14, 31, 6, 717, 10, 10, 18142, 10698, 5, 4, 360, 7, 4, 177, 5760, 394, 354, 4, 123, 9, 1035, 1035, 1035, 10, 10, 13, 92, 124, 89, 488, 7944, 100, 28, 1668, 14, 31, 23, 27, 7479, 29, 220, 468, 8, 124, 14, 286, 170, 8, 157, 46, 5, 27, 239, 16, 179, 15387, 38, 32, 25, 7944, 451, 202, 14, 6, 717]),
list([1, 14, 22, 3443, 6, 176, 7, 5063, 88, 12, 2679, 23, 1310, 5, 109, 943, 4, 114, 9, 55, 606, 5, 111, 7, 4, 139, 193, 273, 23, 4, 172, 270, 11, 7216, 10626, 4, 8463, 2801, 109, 1603, 21, 4, 22, 3861, 8, 6, 1193, 1330, 10, 10, 4, 105, 987, 35, 841, 16873, 19, 861, 1074, 5, 1987, 17975, 45, 55, 221, 15, 670, 5304, 526, 14, 1069, 4, 405, 5, 2438, 7, 27, 85, 108, 131, 4, 5045, 5304, 3884, 405, 9, 3523, 133, 5, 50, 13, 104, 51, 66, 166, 14, 22, 157, 9, 4, 530, 239, 34, 8463, 2801, 45, 407, 31, 7, 41, 3778, 105, 21, 59, 299, 12, 38, 950, 5, 4521, 15, 45, 629, 488, 2733, 127, 6, 52, 292, 17, 4, 6936, 185, 132, 1988, 5304, 1799, 488, 2693, 47, 6, 392, 173, 4, 21686, 4378, 270, 2352, 4, 1500, 7, 4, 65, 55, 73, 11, 346, 14, 20, 9, 6, 976, 2078, 7, 5293, 861, 12746, 5, 4182, 30, 3127, 23651, 56, 4, 841, 5, 990, 692, 8, 4, 1669, 398, 229, 10, 10, 13, 2822, 670, 5304, 14, 9, 31, 7, 27, 111, 108, 15, 2033, 19, 7836, 1429, 875, 551, 14, 22, 9, 1193, 21, 45, 4829, 5, 45, 252, 8, 12508, 6, 565, 921, 3639, 39, 4, 529, 48, 25, 181, 8, 67, 35, 1732, 22, 49, 238, 60, 135, 1162, 14, 9, 290, 4, 58, 10, 10, 472, 45, 55, 878, 8, 169, 11, 374, 5687, 25, 203, 28, 8, 818, 12, 125, 4, 3077]),
list([1, 111, 748, 4368, 1133, 33782, 24563, 4, 87, 1551, 1262, 7, 31, 318, 9459, 7, 4, 498, 5076, 748, 63, 29, 5161, 220, 686, 10941, 5, 17, 12, 575, 220, 2507, 17, 6, 185, 132, 24563, 16, 53, 928, 11, 51278, 74, 4, 438, 21, 27, 10044, 589, 8, 22, 107, 20123, 19550, 997, 1638, 8, 35, 2076, 9019, 11, 22, 231, 54, 29, 1706, 29, 100, 18995, 2425, 34, 12998, 8738, 48078, 5, 19353, 98, 31, 2122, 33, 6, 58, 14, 3808, 1638, 8, 4, 365, 7, 2789, 3761, 356, 346, 4, 27608, 1060, 63, 29, 93, 11, 5421, 11, 15236, 33, 6, 58, 54, 1270, 431, 748, 7, 32, 2580, 16, 11, 94, 19469, 10, 10, 4, 993, 45222, 7, 4, 1766, 2634, 2164, 24563, 8, 847, 8, 1450, 121, 31, 7, 27, 86, 2663, 10760, 16, 6, 465, 993, 2006, 30995, 573, 17, 61862, 42, 4, 17345, 37, 473, 6, 711, 6, 8869, 7, 328, 212, 70, 30, 258, 11, 220, 32, 7, 108, 21, 133, 12, 9, 55, 465, 849, 3711, 53, 33, 2071, 1969, 37, 70, 1144, 4, 5940, 1409, 74, 476, 37, 62, 91, 1329, 169, 4, 1330, 10104, 146, 655, 2212, 5, 258, 12, 184, 10104, 546, 5, 849, 10333, 7, 4, 22, 1436, 18, 631, 1386, 797, 7, 4, 8712, 71, 348, 425, 4320, 1061, 19, 10288, 5, 12141, 11, 661, 8, 339, 17863, 4, 2455, 11434, 7, 4, 1962, 10, 10, 263, 787, 9, 270, 11, 6, 9466, 4, 61862, 48414, 121, 4, 5437, 26, 4434, 19, 68, 1372, 5, 28, 446, 6, 318, 7149, 8, 67, 51, 36, 70, 81, 8, 4392, 2294, 36, 1197, 8, 68411, 25399, 18, 6, 711, 4, 9909, 26, 10296, 1125, 11, 14, 636, 720, 12, 426, 28, 77, 776, 8, 97, 38, 111, 7489, 6175, 168, 1239, 5189, 137, 25399, 18, 27, 173, 9, 2399, 17, 6, 12397, 428, 14657, 232, 11, 4, 8014, 37, 272, 40, 2708, 247, 30, 656, 6, 13182, 54, 25399, 3292, 98, 6, 2840, 40, 558, 37, 6093, 98, 4, 17345, 1197, 15, 14, 9, 57, 4893, 5, 4659, 6, 275, 711, 7937, 25399, 3292, 98, 6, 31036, 10, 10, 6639, 19, 14, 10241, 267, 162, 711, 37, 5900, 752, 98, 4, 17345, 2378, 90, 19, 6, 73284, 7, 36744, 1810, 77553, 4, 4770, 3183, 930, 8, 508, 90, 4, 1317, 8, 4, 48414, 17, 15454, 3965, 1853, 4, 1494, 8, 4468, 189, 4, 31036, 6287, 5774, 4, 4770, 5, 95, 271, 23, 6, 7742, 6063, 21627, 5437, 33, 1526, 6, 425, 3155, 33697, 4535, 1636, 7, 4, 4669, 11966, 469, 4, 4552, 54, 4, 150, 5664, 17345, 280, 53, 68411, 25399, 18, 339, 29, 1978, 27, 7885, 5, 17303, 68, 1830, 19, 6571, 14605, 4, 1515, 7, 263, 65, 2132, 34, 6, 5680, 7489, 43, 159, 29, 9, 4706, 9, 387, 73, 195, 584, 10, 10, 1069, 4, 58, 810, 54, 14, 6078, 117, 22, 16, 93, 5, 1069, 4, 192, 15, 12, 16, 93, 34, 6, 1766, 28228, 33, 4, 5673, 7, 15, 18760, 9252, 3286, 325, 12, 62, 30, 776, 8, 67, 14, 17, 6, 12214, 44, 148, 687, 24563, 203, 42, 203, 24, 28, 69, 32157, 6676, 11, 330, 54, 29, 93, 61862, 21, 845, 14148, 27, 1099, 7, 819, 4, 22, 1407, 17, 6, 14967, 787, 7, 2460, 19569, 61862, 100, 30, 4, 3737, 3617, 3169, 2321, 42, 1898, 11, 4, 3814, 42, 101, 704, 7, 101, 999, 15, 1625, 94, 2926, 180, 5, 9, 9101, 34, 15205, 45, 6, 1429, 22, 60, 6, 1220, 31, 11, 94, 6408, 96, 21, 94, 749, 9, 57, 975]),
...,
list([1, 13, 1408, 15, 8, 135, 14, 9, 35, 32, 46, 394, 20, 62, 30, 5093, 21, 45, 184, 78, 4, 1492, 910, 769, 2290, 2515, 395, 4257, 5, 1454, 11, 119, 16946, 89, 1036, 4, 116, 218, 78, 21, 407, 100, 30, 128, 262, 15, 7, 185, 2280, 284, 1842, 60664, 37, 315, 4, 226, 20, 272, 2942, 40, 29, 152, 60, 181, 8, 30, 50, 553, 362, 80, 119, 12, 21, 846, 5518]),
list([1, 11, 119, 241, 9, 4, 840, 20, 12, 468, 15, 94, 3684, 562, 791, 39, 4, 86, 107, 8, 97, 14, 31, 33, 4, 2960, 7, 743, 46, 1028, 9, 3531, 5, 4, 768, 47, 8, 79, 90, 145, 164, 162, 50, 6, 501, 119, 7, 9, 4, 78, 232, 15, 16, 224, 11, 4, 333, 20, 4, 985, 200, 5, 28739, 5, 9, 1861, 8, 79, 357, 4, 20, 47, 220, 57, 206, 139, 11, 12, 5, 55, 117, 212, 13, 1276, 92, 124, 51, 45, 1188, 71, 536, 13, 520, 14, 20, 6, 2302, 7, 470]),
list([1, 6, 52, 7465, 430, 22, 9, 220, 2594, 8, 28, 24357, 519, 3227, 6, 769, 15, 47, 6, 3482, 4067, 8, 114, 5, 33, 222, 31, 55, 184, 704, 5586, 18020, 19, 346, 3153, 5, 6, 364, 350, 4, 184, 5586, 9, 133, 1810, 11, 5417, 13226, 21, 4, 7298, 42657, 570, 50, 2005, 2643, 9, 6, 1249, 17, 6, 25194, 27803, 21, 17, 6, 1211, 232, 1138, 2249, 29, 266, 56, 96, 346, 194, 308, 9, 194, 21, 29, 218, 1078, 19, 4, 78, 173, 7, 27, 20067, 5698, 3406, 718, 21264, 9, 6, 6907, 17, 210, 5, 3281, 5677, 47, 77, 395, 14, 172, 173, 18, 2740, 2931, 4517, 82, 127, 27, 173, 11, 6, 392, 217, 21, 50, 9, 57, 65, 12, 14274, 53, 40, 35, 390, 7, 11, 4, 3567, 7, 4, 314, 74, 6, 792, 22, 16261, 19, 714, 727, 5205, 382, 4, 91, 6533, 439, 19, 14, 20, 9, 1441, 5805, 1118, 4, 756, 25, 124, 4, 31, 12, 16, 93, 804, 34, 2005, 2643])],
dtype=object),
array([0, 1, 1, ..., 0, 0, 0])))
keras 안엔 여러 모듈 있고, 그 모듈 중 하나가 datasets고, 걔도 여러 모듈 있고, ....
data는 train data와 test data로 나뉨.
train data만 model을 가지고 있다.
test data는 shouldn't be involve model, never!!
그래서 우리는 train data만 생각할거다.
train data는 data, label로 나눠짐.
data는 문제, label은 정답.
imdb는 internet movies data base
data는 예를들어 "this movie is `~~" 의 text data 그런거고
label은 positve(1), negative(0). 그런거임.
(train_data, train_labels), (test_data, test_labels) = keras.datasets.imdb.load_data()
# tuple 형태임.
attribute(특징), ( ) 불필요
method(메소드), ( ) 필요
train_data.shape #이건 attribute임.
(25000,)
train_labels.shape
(25000,)
test_data.shape
(25000,)
test_labels.shape
(25000,)
train_labels
array([1, 0, 0, ..., 0, 1, 0])
train_data
array([list([1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 22665, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 21631, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 19193, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 10311, 8, 4, 107, 117, 5952, 15, 256, 4, 31050, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 12118, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]),
list([1, 194, 1153, 194, 8255, 78, 228, 5, 6, 1463, 4369, 5012, 134, 26, 4, 715, 8, 118, 1634, 14, 394, 20, 13, 119, 954, 189, 102, 5, 207, 110, 3103, 21, 14, 69, 188, 8, 30, 23, 7, 4, 249, 126, 93, 4, 114, 9, 2300, 1523, 5, 647, 4, 116, 9, 35, 8163, 4, 229, 9, 340, 1322, 4, 118, 9, 4, 130, 4901, 19, 4, 1002, 5, 89, 29, 952, 46, 37, 4, 455, 9, 45, 43, 38, 1543, 1905, 398, 4, 1649, 26, 6853, 5, 163, 11, 3215, 10156, 4, 1153, 9, 194, 775, 7, 8255, 11596, 349, 2637, 148, 605, 15358, 8003, 15, 123, 125, 68, 23141, 6853, 15, 349, 165, 4362, 98, 5, 4, 228, 9, 43, 36893, 1157, 15, 299, 120, 5, 120, 174, 11, 220, 175, 136, 50, 9, 4373, 228, 8255, 5, 25249, 656, 245, 2350, 5, 4, 9837, 131, 152, 491, 18, 46151, 32, 7464, 1212, 14, 9, 6, 371, 78, 22, 625, 64, 1382, 9, 8, 168, 145, 23, 4, 1690, 15, 16, 4, 1355, 5, 28, 6, 52, 154, 462, 33, 89, 78, 285, 16, 145, 95]),
list([1, 14, 47, 8, 30, 31, 7, 4, 249, 108, 7, 4, 5974, 54, 61, 369, 13, 71, 149, 14, 22, 112, 4, 2401, 311, 12, 16, 3711, 33, 75, 43, 1829, 296, 4, 86, 320, 35, 534, 19, 263, 4821, 1301, 4, 1873, 33, 89, 78, 12, 66, 16, 4, 360, 7, 4, 58, 316, 334, 11, 4, 1716, 43, 645, 662, 8, 257, 85, 1200, 42, 1228, 2578, 83, 68, 3912, 15, 36, 165, 1539, 278, 36, 69, 44076, 780, 8, 106, 14, 6905, 1338, 18, 6, 22, 12, 215, 28, 610, 40, 6, 87, 326, 23, 2300, 21, 23, 22, 12, 272, 40, 57, 31, 11, 4, 22, 47, 6, 2307, 51, 9, 170, 23, 595, 116, 595, 1352, 13, 191, 79, 638, 89, 51428, 14, 9, 8, 106, 607, 624, 35, 534, 6, 227, 7, 129, 113]),
...,
list([1, 11, 6, 230, 245, 6401, 9, 6, 1225, 446, 86527, 45, 2174, 84, 8322, 4007, 21, 4, 912, 84, 14532, 325, 725, 134, 15271, 1715, 84, 5, 36, 28, 57, 1099, 21, 8, 140, 8, 703, 5, 11656, 84, 56, 18, 1644, 14, 9, 31, 7, 4, 9406, 1209, 2295, 26094, 1008, 18, 6, 20, 207, 110, 563, 12, 8, 2901, 17793, 8, 97, 6, 20, 53, 4767, 74, 4, 460, 364, 1273, 29, 270, 11, 960, 108, 45, 40, 29, 2961, 395, 11, 6, 4065, 500, 7, 14492, 89, 364, 70, 29, 140, 4, 64, 4780, 11, 4, 2678, 26, 178, 4, 529, 443, 17793, 5, 27, 710, 117, 74936, 8123, 165, 47, 84, 37, 131, 818, 14, 595, 10, 10, 61, 1242, 1209, 10, 10, 288, 2260, 1702, 34, 2901, 17793, 4, 65, 496, 4, 231, 7, 790, 5, 6, 320, 234, 2766, 234, 1119, 1574, 7, 496, 4, 139, 929, 2901, 17793, 7750, 5, 4241, 18, 4, 8497, 13164, 250, 11, 1818, 7561, 4, 4217, 5408, 747, 1115, 372, 1890, 1006, 541, 9303, 7, 4, 59, 11027, 4, 3586, 22459]),
list([1, 1446, 7079, 69, 72, 3305, 13, 610, 930, 8, 12, 582, 23, 5, 16, 484, 685, 54, 349, 11, 4120, 2959, 45, 58, 1466, 13, 197, 12, 16, 43, 23, 21469, 5, 62, 30, 145, 402, 11, 4131, 51, 575, 32, 61, 369, 71, 66, 770, 12, 1054, 75, 100, 2198, 8, 4, 105, 37, 69, 147, 712, 75, 3543, 44, 257, 390, 5, 69, 263, 514, 105, 50, 286, 1814, 23, 4, 123, 13, 161, 40, 5, 421, 4, 116, 16, 897, 13, 40691, 40, 319, 5872, 112, 6700, 11, 4803, 121, 25, 70, 3468, 4, 719, 3798, 13, 18, 31, 62, 40, 8, 7200, 4, 29455, 7, 14, 123, 5, 942, 25, 8, 721, 12, 145, 5, 202, 12, 160, 580, 202, 12, 6, 52, 58, 11418, 92, 401, 728, 12, 39, 14, 251, 8, 15, 251, 5, 21213, 12, 38, 84, 80, 124, 12, 9, 23]),
list([1, 17, 6, 194, 337, 7, 4, 204, 22, 45, 254, 8, 106, 14, 123, 4, 12815, 270, 14437, 5, 16923, 12255, 732, 2098, 101, 405, 39, 14, 1034, 4, 1310, 9, 115, 50, 305, 12, 47, 4, 168, 5, 235, 7, 38, 111, 699, 102, 7, 4, 4039, 9245, 9, 24, 6, 78, 1099, 17, 2345, 16553, 21, 27, 9685, 6139, 5, 29043, 1603, 92, 1183, 4, 1310, 7, 4, 204, 42, 97, 90, 35, 221, 109, 29, 127, 27, 118, 8, 97, 12, 157, 21, 6789, 85010, 9, 6, 66, 78, 1099, 4, 631, 1191, 5, 2642, 272, 191, 1070, 6, 7585, 8, 2197, 70907, 10755, 544, 5, 383, 1271, 848, 1468, 12183, 497, 16876, 8, 1597, 8778, 19280, 21, 60, 27, 239, 9, 43, 8368, 209, 405, 10, 10, 12, 764, 40, 4, 248, 20, 12, 16, 5, 174, 1791, 72, 7, 51, 6, 1739, 22, 4, 204, 131, 9])],
dtype=object)
print( train_data[0] )
[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 22665, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 21631, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 19193, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 10311, 8, 4, 107, 117, 5952, 15, 256, 4, 31050, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 12118, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]
type( train_data[0] )
list
word_index = keras.datasets.imdb.get_word_index()
# print(word_index)
#dict라서 key : value 꼴로 나타남. {'fawn': 34701, 'tsukino': 52006, 'nunnery': 52007, 등등
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json 1646592/1641221 [==============================] - 0s 0us/step 1654784/1641221 [==============================] - 0s 0us/step
len(word_index)
88584
88584는 corpus임.
50000개 중 25000는 train data, 25000는 test data
그 50000개의 data는 88584개의 단어종류를 포함한다.
type( word_index )
dict
len(train_data[0]) #first data의 개수
218
len(train_data[1])
189
len(train_data[2])
141
data -> AI model -> label
data의 len는 다 다른데, 256로 고정시키겠다.
long comment, short comment 가 있지만, 256 words로 고정시킴.
길면 자르고, 짧으면 padding(00000...)로 채움. post면 뒤에, pre면 앞에.
train_data = keras.preprocessing.sequence.pad_sequences(train_data, maxlen = 256, padding = 'post')
test_data = keras.preprocessing.sequence.pad_sequences(test_data, maxlen = 256, padding = 'post')
model = keras.Sequential()
model.add(keras.layers.Embedding(len(word_index)+3, 32, input_length = 256))
model.add(keras.layers.GlobalAveragePooling1D( ))
model.add(keras.layers.Dense(16, activation = 'relu'))
model.add(keras.layers.Dense(16, activation = 'sigmoid'))
model.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= embedding (Embedding) (None, 256, 32) 2834784 _________________________________________________________________ global_average_pooling1d (Gl (None, 32) 0 _________________________________________________________________ dense (Dense) (None, 16) 528 _________________________________________________________________ dense_1 (Dense) (None, 16) 272 ================================================================= Total params: 2,835,584 Trainable params: 2,835,584 Non-trainable params: 0 _________________________________________________________________
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
# model.fit ( train_data, train_labels, batch_size = 512, epochs =10 )
# model.evaluate(test_data, test_labels)
python statement
if, for, while
function
AI, ML, DL을 먼저 배워보겠다.
basic learning.
data가 있는데, data엔 Question과 Answer가 있다. 둘 다 준비되어 있다.
이걸 Model에 넣음.
x1과 x2는 각각 feature임.
예, x1는 height(cm), x2는 weight(kg).
병 있으면 O, 없으면 X.
O, X가 answer, 즉 label임.
x1, x2는 question, 즉 feature data임.
그래서 우리는 다른 키, 체중 가진 경우를 예측하는 거임.
no label.
feature만 있다.
just plot the data.
group A, group B 정도로만 나눌 수 있음.
이게 supervised learning을 보조함.
no label
위 세가지 경우와 완전히 다름.
Go 바둑 게임.
바둑알을 여기다 두면 이길 확률.
이길 확률 높은 걸 계속계속 배움.
agent , reward, state, environment 있음.
Yann LeCun이라는 사람, God of AI
Geoffey Heton은 King of AI
cake 전체는 computer AI brain.
supervised learning은 icing 같다.
reinforcement learning은 cherry 같다.
내부는 unsupervised learning.
deep fake는 unsupervised learning임.
clustering
많은 feature data(weight, height, Blood, Heart, ...) 가 있으면,
1000가지 feature 있으면,
dimension reduction을 해야함.
그게 unsupervised learning.
supervised learning에는 두가지 있다.
Input : feature data.
Output : label.
둘 다 주어진다.
그리고 supervised learning은
regression problem, classification problem으로 나뉨.
regression : output(label)이 continuous values.
classification : output(label)이 discrete values. categorized value.
data를 model에 넣음.
model엔 weight 있음.
weight는 100,000, 1million, 1billion 등 많은 값이 있따.
model을 몇 개 정해놓음. Hypothesis(가설).
그리고 learning해서,
weight(model의 optional parameters)를 찾음.
another new data를 trained model에 넣음.
prediction(; estimation) 함.
data in supervised learning : feature, label
: form이 많다.
(1) table data or time-series data :1D
(2) Image : 1frame. 2D or 3D : 많은 channel 있다. CT, MR, US 등의 이미지는 DICom image임.
(3) video : 3D or 4D : 30frame/sec 등으로 표현.
: classification problem.
(1) binary label : malignat of benigh : 0 or 1.
(2) multi-label : 0,`,1,2,.... :
kaggle.com엔 AI computation이 있다.
prize 탈 수 있다.
6 elements로 구성된 1 list가 label 결정. 즉, one-hot Encoding.
data를 training, etst로 바꾸는 비율은 보통
8:2, 7:3, or 9:1
test data는 꼭 isolated 되어있어야 함.
ML alrogithm에는 여러가지가 있다.
Linear regression,
: very basic. only for regression problem. predict some values.
Logistic regression, ,.........
regression problem
feature data로서 size 가 있다. only 1 feature.
output, label은 달러임.
일단은 걍 feature가 하나만 있다고 가정하자.
빨간 점이 whole data임.
modeling해서 hypothesis를 세우자. linear pattern이라고.
y=wx+b로. w는 slope, b는 intersection.
두개의 파라미터가 있는 것이다.
every single value는 x,y가 있는거니까 그걸 y=wx+b에 대입.
2 parameter니까 2 equation만 있으면 된다.
근데 여러 가지 해봐가지구 w, b 찾는거다.
boston housing dataset
14개의 features 있다.
y = w1x1 + .... + w13x13 + b로 식 세울 수 있다. (linear regression이면!)
먼저, from tensorflow.keras import datasets를 한다.
from tensorflow.keras import datasets
(train_data, train_label), (test_data, test_label) = datasets.boston_housing.load_data()
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/boston_housing.npz 57344/57026 [==============================] - 0s 0us/step 65536/57026 [==================================] - 0s 0us/step
train_data에는 13 features 있음.
type(train_data)
numpy.ndarray
train_data.shape
(404, 13)
train_label.shape
(404,)
test_data.shape
(102, 13)
test_label.shape
(102,)
13 feature data가 404개임.
train_data[0]
train_data.shape 하면 (404, 13) 나옴. 2 dimension임.
train_label.shape하면 (404,) 나옴. 즉, 1 dimension임.
(404,1)는 2D로 간주함.
train_data[0] 하면 array([ ?,?,?, ? ..., ?])임.
train_data[0]는 13 elements의 list이고, train_data[403]까지 있음.
test_data.shape 하면 (102,13)가 나옴.
test_label.shape 하면 (102,)가 나옴.
print(train_data)
[[1.23247e+00 0.00000e+00 8.14000e+00 ... 2.10000e+01 3.96900e+02 1.87200e+01] [2.17700e-02 8.25000e+01 2.03000e+00 ... 1.47000e+01 3.95380e+02 3.11000e+00] [4.89822e+00 0.00000e+00 1.81000e+01 ... 2.02000e+01 3.75520e+02 3.26000e+00] ... [3.46600e-02 3.50000e+01 6.06000e+00 ... 1.69000e+01 3.62250e+02 7.83000e+00] [2.14918e+00 0.00000e+00 1.95800e+01 ... 1.47000e+01 2.61950e+02 1.57900e+01] [1.43900e-02 6.00000e+01 2.93000e+00 ... 1.56000e+01 3.76700e+02 4.38000e+00]]
print(train_label)
[15.2 42.3 50. 21.1 17.7 18.5 11.3 15.6 15.6 14.4 12.1 17.9 23.1 19.9 15.7 8.8 50. 22.5 24.1 27.5 10.9 30.8 32.9 24. 18.5 13.3 22.9 34.7 16.6 17.5 22.3 16.1 14.9 23.1 34.9 25. 13.9 13.1 20.4 20. 15.2 24.7 22.2 16.7 12.7 15.6 18.4 21. 30.1 15.1 18.7 9.6 31.5 24.8 19.1 22. 14.5 11. 32. 29.4 20.3 24.4 14.6 19.5 14.1 14.3 15.6 10.5 6.3 19.3 19.3 13.4 36.4 17.8 13.5 16.5 8.3 14.3 16. 13.4 28.6 43.5 20.2 22. 23. 20.7 12.5 48.5 14.6 13.4 23.7 50. 21.7 39.8 38.7 22.2 34.9 22.5 31.1 28.7 46. 41.7 21. 26.6 15. 24.4 13.3 21.2 11.7 21.7 19.4 50. 22.8 19.7 24.7 36.2 14.2 18.9 18.3 20.6 24.6 18.2 8.7 44. 10.4 13.2 21.2 37. 30.7 22.9 20. 19.3 31.7 32. 23.1 18.8 10.9 50. 19.6 5. 14.4 19.8 13.8 19.6 23.9 24.5 25. 19.9 17.2 24.6 13.5 26.6 21.4 11.9 22.6 19.6 8.5 23.7 23.1 22.4 20.5 23.6 18.4 35.2 23.1 27.9 20.6 23.7 28. 13.6 27.1 23.6 20.6 18.2 21.7 17.1 8.4 25.3 13.8 22.2 18.4 20.7 31.6 30.5 20.3 8.8 19.2 19.4 23.1 23. 14.8 48.8 22.6 33.4 21.1 13.6 32.2 13.1 23.4 18.9 23.9 11.8 23.3 22.8 19.6 16.7 13.4 22.2 20.4 21.8 26.4 14.9 24.1 23.8 12.3 29.1 21. 19.5 23.3 23.8 17.8 11.5 21.7 19.9 25. 33.4 28.5 21.4 24.3 27.5 33.1 16.2 23.3 48.3 22.9 22.8 13.1 12.7 22.6 15. 15.3 10.5 24. 18.5 21.7 19.5 33.2 23.2 5. 19.1 12.7 22.3 10.2 13.9 16.3 17. 20.1 29.9 17.2 37.3 45.4 17.8 23.2 29. 22. 18. 17.4 34.6 20.1 25. 15.6 24.8 28.2 21.2 21.4 23.8 31. 26.2 17.4 37.9 17.5 20. 8.3 23.9 8.4 13.8 7.2 11.7 17.1 21.6 50. 16.1 20.4 20.6 21.4 20.6 36.5 8.5 24.8 10.8 21.9 17.3 18.9 36.2 14.9 18.2 33.3 21.8 19.7 31.6 24.8 19.4 22.8 7.5 44.8 16.8 18.7 50. 50. 19.5 20.1 50. 17.2 20.8 19.3 41.3 20.4 20.5 13.8 16.5 23.9 20.6 31.5 23.3 16.8 14. 33.8 36.1 12.8 18.3 18.7 19.1 29. 30.1 50. 50. 22. 11.9 37.6 50. 22.7 20.8 23.5 27.9 50. 19.3 23.9 22.6 15.2 21.7 19.2 43.8 20.3 33.2 19.9 22.5 32.7 22. 17.1 19. 15. 16.1 25.1 23.7 28.7 37.2 22.6 16.4 25. 29.8 22.1 17.4 18.1 30.3 17.5 24.7 12.6 26.5 28.7 13.3 10.4 24.4 23. 20. 17.8 7. 11.8 24.4 13.8 19.4 25.2 19.4 19.4 29.1]
train_label
array([15.2, 42.3, 50. , 21.1, 17.7, 18.5, 11.3, 15.6, 15.6, 14.4, 12.1,
17.9, 23.1, 19.9, 15.7, 8.8, 50. , 22.5, 24.1, 27.5, 10.9, 30.8,
32.9, 24. , 18.5, 13.3, 22.9, 34.7, 16.6, 17.5, 22.3, 16.1, 14.9,
23.1, 34.9, 25. , 13.9, 13.1, 20.4, 20. , 15.2, 24.7, 22.2, 16.7,
12.7, 15.6, 18.4, 21. , 30.1, 15.1, 18.7, 9.6, 31.5, 24.8, 19.1,
22. , 14.5, 11. , 32. , 29.4, 20.3, 24.4, 14.6, 19.5, 14.1, 14.3,
15.6, 10.5, 6.3, 19.3, 19.3, 13.4, 36.4, 17.8, 13.5, 16.5, 8.3,
14.3, 16. , 13.4, 28.6, 43.5, 20.2, 22. , 23. , 20.7, 12.5, 48.5,
14.6, 13.4, 23.7, 50. , 21.7, 39.8, 38.7, 22.2, 34.9, 22.5, 31.1,
28.7, 46. , 41.7, 21. , 26.6, 15. , 24.4, 13.3, 21.2, 11.7, 21.7,
19.4, 50. , 22.8, 19.7, 24.7, 36.2, 14.2, 18.9, 18.3, 20.6, 24.6,
18.2, 8.7, 44. , 10.4, 13.2, 21.2, 37. , 30.7, 22.9, 20. , 19.3,
31.7, 32. , 23.1, 18.8, 10.9, 50. , 19.6, 5. , 14.4, 19.8, 13.8,
19.6, 23.9, 24.5, 25. , 19.9, 17.2, 24.6, 13.5, 26.6, 21.4, 11.9,
22.6, 19.6, 8.5, 23.7, 23.1, 22.4, 20.5, 23.6, 18.4, 35.2, 23.1,
27.9, 20.6, 23.7, 28. , 13.6, 27.1, 23.6, 20.6, 18.2, 21.7, 17.1,
8.4, 25.3, 13.8, 22.2, 18.4, 20.7, 31.6, 30.5, 20.3, 8.8, 19.2,
19.4, 23.1, 23. , 14.8, 48.8, 22.6, 33.4, 21.1, 13.6, 32.2, 13.1,
23.4, 18.9, 23.9, 11.8, 23.3, 22.8, 19.6, 16.7, 13.4, 22.2, 20.4,
21.8, 26.4, 14.9, 24.1, 23.8, 12.3, 29.1, 21. , 19.5, 23.3, 23.8,
17.8, 11.5, 21.7, 19.9, 25. , 33.4, 28.5, 21.4, 24.3, 27.5, 33.1,
16.2, 23.3, 48.3, 22.9, 22.8, 13.1, 12.7, 22.6, 15. , 15.3, 10.5,
24. , 18.5, 21.7, 19.5, 33.2, 23.2, 5. , 19.1, 12.7, 22.3, 10.2,
13.9, 16.3, 17. , 20.1, 29.9, 17.2, 37.3, 45.4, 17.8, 23.2, 29. ,
22. , 18. , 17.4, 34.6, 20.1, 25. , 15.6, 24.8, 28.2, 21.2, 21.4,
23.8, 31. , 26.2, 17.4, 37.9, 17.5, 20. , 8.3, 23.9, 8.4, 13.8,
7.2, 11.7, 17.1, 21.6, 50. , 16.1, 20.4, 20.6, 21.4, 20.6, 36.5,
8.5, 24.8, 10.8, 21.9, 17.3, 18.9, 36.2, 14.9, 18.2, 33.3, 21.8,
19.7, 31.6, 24.8, 19.4, 22.8, 7.5, 44.8, 16.8, 18.7, 50. , 50. ,
19.5, 20.1, 50. , 17.2, 20.8, 19.3, 41.3, 20.4, 20.5, 13.8, 16.5,
23.9, 20.6, 31.5, 23.3, 16.8, 14. , 33.8, 36.1, 12.8, 18.3, 18.7,
19.1, 29. , 30.1, 50. , 50. , 22. , 11.9, 37.6, 50. , 22.7, 20.8,
23.5, 27.9, 50. , 19.3, 23.9, 22.6, 15.2, 21.7, 19.2, 43.8, 20.3,
33.2, 19.9, 22.5, 32.7, 22. , 17.1, 19. , 15. , 16.1, 25.1, 23.7,
28.7, 37.2, 22.6, 16.4, 25. , 29.8, 22.1, 17.4, 18.1, 30.3, 17.5,
24.7, 12.6, 26.5, 28.7, 13.3, 10.4, 24.4, 23. , 20. , 17.8, 7. ,
11.8, 24.4, 13.8, 19.4, 25.2, 19.4, 19.4, 29.1])
print(train_label[0])
15.2
# import LinearRegression Class
from sklearn.linear_model import LinearRegression
# Instance (object) creation
model = LinearRegression()
🔥의 세 줄만 하면 다 되는거임....!!!!!!!!!!!!!!!!!!!!!!!!!!
from sklearn.linear_model import LinearRegression 🔥
model = LinearRegression( ) 🔥
(c.f. 대문자로 시작하면 class임. model은 instance(; object)고, Linear Regression은 class임.)
(c.f. a = 1 에서 a는 instance임. a.하면 여러 attribute와 method 나옴.)
(c.f. model. 하면 여러 attribute와 method 나옴.)
model.fit(train_data, train_label)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
model.fit( train_data, train_label ) 🔥 # fit은 train a model을 의미. w1x1+ ... + w13x13 쓰고 training까지 다 끝남.
predicted_results = model.predict( test_data )
print( predicted_results ) # 하면 예상된 price가 list로 쫘르르르륵 나옴. 9.6
[ 9.69267239 21.05859431 20.94145461 34.25223414 25.95902598 20.56741038 28.15310405 25.07113862 20.30519458 22.82489511 20.29027213 18.07938917 16.46141024 35.31166595 19.41369959 19.34727116 24.58140904 21.01346252 19.99079236 23.79286427 12.20780045 17.14218992 22.51468752 12.92570511 21.18070615 23.94073547 33.46176314 24.2631555 13.35213096 20.71939252 23.45082103 19.1486881 36.09743765 23.47113531 19.03141995 5.92425733 14.02090469 22.96425377 16.01731242 27.09134249 22.06997029 28.61755389 17.75679772 34.75579014 31.29455712 24.73234165 31.12655323 18.09268992 22.62638989 24.21208909 30.5854317 18.71860574 10.50163617 13.73779292 34.4022841 27.53140317 18.08151522 40.05101982 37.63271513 24.64021422 25.73823434 20.64369987 20.33514699 21.38393124 24.63485497 23.53071805 17.42141466 26.76911736 3.83782148 11.97063636 24.2602519 23.67191799 23.66639182 8.4403269 28.52690962 20.94071229 20.44489658 24.77678169 33.77299778 7.02038331 24.55379254 36.73923493 16.09878612 18.14663987 20.48899554 18.7928142 22.45683561 26.19154506 23.36585592 28.89583791 17.48243942 16.05642916 26.69532678 28.1393996 35.04164989 20.05308052 36.25269116 38.51475052 25.13506119 41.49062194 34.65500235 25.33950895]
print( test_label ) # 하면 true value가 나옴. 요게 진짜 value. 7.2
[ 7.2 18.8 19. 27. 22.2 24.5 31.2 22.9 20.5 23.2 18.6 14.5 17.8 50. 20.8 24.3 24.2 19.8 19.1 22.7 12. 10.2 20. 18.5 20.9 23. 27.5 30.1 9.5 22. 21.2 14.1 33.1 23.4 20.1 7.4 15.4 23.8 20.1 24.5 33. 28.4 14.1 46.7 32.5 29.6 28.4 19.8 20.2 25. 35.4 20.3 9.7 14.5 34.9 26.6 7.2 50. 32.4 21.6 29.8 13.1 27.5 21.2 23.1 21.9 13. 23.2 8.1 5.6 21.7 29.6 19.6 7. 26.4 18.9 20.9 28.1 35.4 10.2 24.3 43.1 17.6 15.4 16.2 27.1 21.4 21.5 22.4 25. 16.6 18.6 22. 42.8 35.1 21.5 36. 21.9 24.1 50. 26.7 25. ]
abs( predicted_results - test_label ) #하면 error value가 나옴. 2.4
array([ 2.49267239, 2.25859431, 1.94145461, 7.25223414, 3.75902598,
3.93258962, 3.04689595, 2.17113862, 0.19480542, 0.37510489,
1.69027213, 3.57938917, 1.33858976, 14.68833405, 1.38630041,
4.95272884, 0.38140904, 1.21346252, 0.89079236, 1.09286427,
0.20780045, 6.94218992, 2.51468752, 5.57429489, 0.28070615,
0.94073547, 5.96176314, 5.8368445 , 3.85213096, 1.28060748,
2.25082103, 5.0486881 , 2.99743765, 0.07113531, 1.06858005,
1.47574267, 1.37909531, 0.83574623, 4.08268758, 2.59134249,
10.93002971, 0.21755389, 3.65679772, 11.94420986, 1.20544288,
4.86765835, 2.72655323, 1.70731008, 2.42638989, 0.78791091,
4.8145683 , 1.58139426, 0.80163617, 0.76220708, 0.4977159 ,
0.93140317, 10.88151522, 9.94898018, 5.23271513, 3.04021422,
4.06176566, 7.54369987, 7.16485301, 0.18393124, 1.53485497,
1.63071805, 4.42141466, 3.56911736, 4.26217852, 6.37063636,
2.5602519 , 5.92808201, 4.06639182, 1.4403269 , 2.12690962,
2.04071229, 0.45510342, 3.32321831, 1.62700222, 3.17961669,
0.25379254, 6.36076507, 1.50121388, 2.74663987, 4.28899554,
8.3071858 , 1.05683561, 4.69154506, 0.96585592, 3.89583791,
0.88243942, 2.54357084, 4.69532678, 14.6606004 , 0.05835011,
1.44691948, 0.25269116, 16.61475052, 1.03506119, 8.50937806,
7.95500235, 0.33950895])
average_error = sum( abs( predicted_results - test_label ) / len(test_label) ) #len(test_label)은 102임.
print( average_error ) #하면 평균 에러. 즉, 3.4 thousand dollars.
3.464185812406718
print( f"평균 에러는 {average_error * 1000} $ 이다." )
평균 에러는 3464.185812406718 $ 이다.
print( f"평균 에러는 {(average_error*1000):.2f} $ 이다." ) #하면 소숫점 아래 두자리 수까지.
평균 에러는 3464.19 $ 이다.
#1 data
from tensorflow.keras import datasets
(train_data, train_label), (test_data, test_label) = datasets.boston_housing.load_data()
#1.1 data analysis
type(train_data)
numpy.ndarray
train_data.shape # 2D
(404, 13)
train_label.shape # 1D이다. c.f.) (404, 1)은 2D로 간주함.
(404,)
train_data[0]
array([ 1.23247, 0. , 8.14 , 0. , 0.538 , 6.142 ,
91.7 , 3.9769 , 4. , 307. , 21. , 396.9 ,
18.72 ])
train_data[403] #404부터는 에러남.
array([1.4390e-02, 6.0000e+01, 2.9300e+00, 0.0000e+00, 4.0100e-01,
6.6040e+00, 1.8800e+01, 6.2196e+00, 1.0000e+00, 2.6500e+02,
1.5600e+01, 3.7670e+02, 4.3800e+00])
test_data.shape
(102, 13)
test_label.shape
(102,)
print(train_data)
[[1.23247e+00 0.00000e+00 8.14000e+00 ... 2.10000e+01 3.96900e+02 1.87200e+01] [2.17700e-02 8.25000e+01 2.03000e+00 ... 1.47000e+01 3.95380e+02 3.11000e+00] [4.89822e+00 0.00000e+00 1.81000e+01 ... 2.02000e+01 3.75520e+02 3.26000e+00] ... [3.46600e-02 3.50000e+01 6.06000e+00 ... 1.69000e+01 3.62250e+02 7.83000e+00] [2.14918e+00 0.00000e+00 1.95800e+01 ... 1.47000e+01 2.61950e+02 1.57900e+01] [1.43900e-02 6.00000e+01 2.93000e+00 ... 1.56000e+01 3.76700e+02 4.38000e+00]]
print(train_data[0])
[ 1.23247 0. 8.14 0. 0.538 6.142 91.7 3.9769 4. 307. 21. 396.9 18.72 ]
print(train_label)
[15.2 42.3 50. 21.1 17.7 18.5 11.3 15.6 15.6 14.4 12.1 17.9 23.1 19.9 15.7 8.8 50. 22.5 24.1 27.5 10.9 30.8 32.9 24. 18.5 13.3 22.9 34.7 16.6 17.5 22.3 16.1 14.9 23.1 34.9 25. 13.9 13.1 20.4 20. 15.2 24.7 22.2 16.7 12.7 15.6 18.4 21. 30.1 15.1 18.7 9.6 31.5 24.8 19.1 22. 14.5 11. 32. 29.4 20.3 24.4 14.6 19.5 14.1 14.3 15.6 10.5 6.3 19.3 19.3 13.4 36.4 17.8 13.5 16.5 8.3 14.3 16. 13.4 28.6 43.5 20.2 22. 23. 20.7 12.5 48.5 14.6 13.4 23.7 50. 21.7 39.8 38.7 22.2 34.9 22.5 31.1 28.7 46. 41.7 21. 26.6 15. 24.4 13.3 21.2 11.7 21.7 19.4 50. 22.8 19.7 24.7 36.2 14.2 18.9 18.3 20.6 24.6 18.2 8.7 44. 10.4 13.2 21.2 37. 30.7 22.9 20. 19.3 31.7 32. 23.1 18.8 10.9 50. 19.6 5. 14.4 19.8 13.8 19.6 23.9 24.5 25. 19.9 17.2 24.6 13.5 26.6 21.4 11.9 22.6 19.6 8.5 23.7 23.1 22.4 20.5 23.6 18.4 35.2 23.1 27.9 20.6 23.7 28. 13.6 27.1 23.6 20.6 18.2 21.7 17.1 8.4 25.3 13.8 22.2 18.4 20.7 31.6 30.5 20.3 8.8 19.2 19.4 23.1 23. 14.8 48.8 22.6 33.4 21.1 13.6 32.2 13.1 23.4 18.9 23.9 11.8 23.3 22.8 19.6 16.7 13.4 22.2 20.4 21.8 26.4 14.9 24.1 23.8 12.3 29.1 21. 19.5 23.3 23.8 17.8 11.5 21.7 19.9 25. 33.4 28.5 21.4 24.3 27.5 33.1 16.2 23.3 48.3 22.9 22.8 13.1 12.7 22.6 15. 15.3 10.5 24. 18.5 21.7 19.5 33.2 23.2 5. 19.1 12.7 22.3 10.2 13.9 16.3 17. 20.1 29.9 17.2 37.3 45.4 17.8 23.2 29. 22. 18. 17.4 34.6 20.1 25. 15.6 24.8 28.2 21.2 21.4 23.8 31. 26.2 17.4 37.9 17.5 20. 8.3 23.9 8.4 13.8 7.2 11.7 17.1 21.6 50. 16.1 20.4 20.6 21.4 20.6 36.5 8.5 24.8 10.8 21.9 17.3 18.9 36.2 14.9 18.2 33.3 21.8 19.7 31.6 24.8 19.4 22.8 7.5 44.8 16.8 18.7 50. 50. 19.5 20.1 50. 17.2 20.8 19.3 41.3 20.4 20.5 13.8 16.5 23.9 20.6 31.5 23.3 16.8 14. 33.8 36.1 12.8 18.3 18.7 19.1 29. 30.1 50. 50. 22. 11.9 37.6 50. 22.7 20.8 23.5 27.9 50. 19.3 23.9 22.6 15.2 21.7 19.2 43.8 20.3 33.2 19.9 22.5 32.7 22. 17.1 19. 15. 16.1 25.1 23.7 28.7 37.2 22.6 16.4 25. 29.8 22.1 17.4 18.1 30.3 17.5 24.7 12.6 26.5 28.7 13.3 10.4 24.4 23. 20. 17.8 7. 11.8 24.4 13.8 19.4 25.2 19.4 19.4 29.1]
print(train_label[0])
15.2
#2. Modeling
#2.1 Import LinearRegression Class
from sklearn.linear_model import LinearRegression #대문자로 시작하니까 class임.
#2.2 Instance (0bject) creation
model_lr = LinearRegression()
#LinearRegression는 class,
#model_lr은 instance(=object) 이다. instance는 attribute, method를 부를 수 있다. 쩜 찍어서.
#3. Training
model_lr.fit(train_data, train_label)
#fit은, 'train a model'
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
#4. Evaluation
prediction_lr = model_lr.predict(test_data)
print(prediction_lr)
[ 9.69267239 21.05859431 20.94145461 34.25223414 25.95902598 20.56741038 28.15310405 25.07113862 20.30519458 22.82489511 20.29027213 18.07938917 16.46141024 35.31166595 19.41369959 19.34727116 24.58140904 21.01346252 19.99079236 23.79286427 12.20780045 17.14218992 22.51468752 12.92570511 21.18070615 23.94073547 33.46176314 24.2631555 13.35213096 20.71939252 23.45082103 19.1486881 36.09743765 23.47113531 19.03141995 5.92425733 14.02090469 22.96425377 16.01731242 27.09134249 22.06997029 28.61755389 17.75679772 34.75579014 31.29455712 24.73234165 31.12655323 18.09268992 22.62638989 24.21208909 30.5854317 18.71860574 10.50163617 13.73779292 34.4022841 27.53140317 18.08151522 40.05101982 37.63271513 24.64021422 25.73823434 20.64369987 20.33514699 21.38393124 24.63485497 23.53071805 17.42141466 26.76911736 3.83782148 11.97063636 24.2602519 23.67191799 23.66639182 8.4403269 28.52690962 20.94071229 20.44489658 24.77678169 33.77299778 7.02038331 24.55379254 36.73923493 16.09878612 18.14663987 20.48899554 18.7928142 22.45683561 26.19154506 23.36585592 28.89583791 17.48243942 16.05642916 26.69532678 28.1393996 35.04164989 20.05308052 36.25269116 38.51475052 25.13506119 41.49062194 34.65500235 25.33950895]
print( test_label ) #얘는 정답
[ 7.2 18.8 19. 27. 22.2 24.5 31.2 22.9 20.5 23.2 18.6 14.5 17.8 50. 20.8 24.3 24.2 19.8 19.1 22.7 12. 10.2 20. 18.5 20.9 23. 27.5 30.1 9.5 22. 21.2 14.1 33.1 23.4 20.1 7.4 15.4 23.8 20.1 24.5 33. 28.4 14.1 46.7 32.5 29.6 28.4 19.8 20.2 25. 35.4 20.3 9.7 14.5 34.9 26.6 7.2 50. 32.4 21.6 29.8 13.1 27.5 21.2 23.1 21.9 13. 23.2 8.1 5.6 21.7 29.6 19.6 7. 26.4 18.9 20.9 28.1 35.4 10.2 24.3 43.1 17.6 15.4 16.2 27.1 21.4 21.5 22.4 25. 16.6 18.6 22. 42.8 35.1 21.5 36. 21.9 24.1 50. 26.7 25. ]
sum(abs(prediction_lr - test_label)) / 102 #정답이랑 예상한거랑 비교해보쟈
3.4641858124067206
error = sum(abs(prediction_lr - test_label)) / len(test_label)
print(error)
3.4641858124067206
print( f"평균 에러는 {error*1000 :.2f}")
평균 에러는 3464.19
from tensorflow.keras import datasets
(train_data, train_label), (test_data, test_label) = datasets.boston_housing.load_data()
#train data에는,
#data는 가로 13 세로 404
#label(house price)는 가로 1 세로 404
train_data.shape
(404, 13)
train_label.shape
(404,)
test_data.shape
(102, 13)
test_label.shape
(102,)
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(train_data, train_label)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
model.coef_
#도구표시는 attribute임.
#y = w1x1 + w2x2 + ... + w13x13 + b에서 w1, w2, .., , w13 는 coefficient임.
array([-1.19997513e-01, 5.70003304e-02, 3.98379660e-03, 4.12698187e+00,
-2.05002963e+01, 3.38024903e+00, 7.56807584e-03, -1.71189793e+00,
3.34747537e-01, -1.17797225e-02, -9.02318039e-01, 8.71912756e-03,
-5.55842510e-01])
model.intercept_
#y = w1x1 + w2x2 + ... + w13x13 + b에서 b값
40.29367058057704
prediction = model.predict(test_data)
prediction
#이거는 예상한 값
array([ 9.69267239, 21.05859431, 20.94145461, 34.25223414, 25.95902598,
20.56741038, 28.15310405, 25.07113862, 20.30519458, 22.82489511,
20.29027213, 18.07938917, 16.46141024, 35.31166595, 19.41369959,
19.34727116, 24.58140904, 21.01346252, 19.99079236, 23.79286427,
12.20780045, 17.14218992, 22.51468752, 12.92570511, 21.18070615,
23.94073547, 33.46176314, 24.2631555 , 13.35213096, 20.71939252,
23.45082103, 19.1486881 , 36.09743765, 23.47113531, 19.03141995,
5.92425733, 14.02090469, 22.96425377, 16.01731242, 27.09134249,
22.06997029, 28.61755389, 17.75679772, 34.75579014, 31.29455712,
24.73234165, 31.12655323, 18.09268992, 22.62638989, 24.21208909,
30.5854317 , 18.71860574, 10.50163617, 13.73779292, 34.4022841 ,
27.53140317, 18.08151522, 40.05101982, 37.63271513, 24.64021422,
25.73823434, 20.64369987, 20.33514699, 21.38393124, 24.63485497,
23.53071805, 17.42141466, 26.76911736, 3.83782148, 11.97063636,
24.2602519 , 23.67191799, 23.66639182, 8.4403269 , 28.52690962,
20.94071229, 20.44489658, 24.77678169, 33.77299778, 7.02038331,
24.55379254, 36.73923493, 16.09878612, 18.14663987, 20.48899554,
18.7928142 , 22.45683561, 26.19154506, 23.36585592, 28.89583791,
17.48243942, 16.05642916, 26.69532678, 28.1393996 , 35.04164989,
20.05308052, 36.25269116, 38.51475052, 25.13506119, 41.49062194,
34.65500235, 25.33950895])
test_data.shape
(102, 13)
test_label
#이거는 true value
array([ 7.2, 18.8, 19. , 27. , 22.2, 24.5, 31.2, 22.9, 20.5, 23.2, 18.6,
14.5, 17.8, 50. , 20.8, 24.3, 24.2, 19.8, 19.1, 22.7, 12. , 10.2,
20. , 18.5, 20.9, 23. , 27.5, 30.1, 9.5, 22. , 21.2, 14.1, 33.1,
23.4, 20.1, 7.4, 15.4, 23.8, 20.1, 24.5, 33. , 28.4, 14.1, 46.7,
32.5, 29.6, 28.4, 19.8, 20.2, 25. , 35.4, 20.3, 9.7, 14.5, 34.9,
26.6, 7.2, 50. , 32.4, 21.6, 29.8, 13.1, 27.5, 21.2, 23.1, 21.9,
13. , 23.2, 8.1, 5.6, 21.7, 29.6, 19.6, 7. , 26.4, 18.9, 20.9,
28.1, 35.4, 10.2, 24.3, 43.1, 17.6, 15.4, 16.2, 27.1, 21.4, 21.5,
22.4, 25. , 16.6, 18.6, 22. , 42.8, 35.1, 21.5, 36. , 21.9, 24.1,
50. , 26.7, 25. ])
#prediction이랑 true value 비교해서 error 구해보자.
prediction - test_label
array([ 2.49267239, 2.25859431, 1.94145461, 7.25223414,
3.75902598, -3.93258962, -3.04689595, 2.17113862,
-0.19480542, -0.37510489, 1.69027213, 3.57938917,
-1.33858976, -14.68833405, -1.38630041, -4.95272884,
0.38140904, 1.21346252, 0.89079236, 1.09286427,
0.20780045, 6.94218992, 2.51468752, -5.57429489,
0.28070615, 0.94073547, 5.96176314, -5.8368445 ,
3.85213096, -1.28060748, 2.25082103, 5.0486881 ,
2.99743765, 0.07113531, -1.06858005, -1.47574267,
-1.37909531, -0.83574623, -4.08268758, 2.59134249,
-10.93002971, 0.21755389, 3.65679772, -11.94420986,
-1.20544288, -4.86765835, 2.72655323, -1.70731008,
2.42638989, -0.78791091, -4.8145683 , -1.58139426,
0.80163617, -0.76220708, -0.4977159 , 0.93140317,
10.88151522, -9.94898018, 5.23271513, 3.04021422,
-4.06176566, 7.54369987, -7.16485301, 0.18393124,
1.53485497, 1.63071805, 4.42141466, 3.56911736,
-4.26217852, 6.37063636, 2.5602519 , -5.92808201,
4.06639182, 1.4403269 , 2.12690962, 2.04071229,
-0.45510342, -3.32321831, -1.62700222, -3.17961669,
0.25379254, -6.36076507, -1.50121388, 2.74663987,
4.28899554, -8.3071858 , 1.05683561, 4.69154506,
0.96585592, 3.89583791, 0.88243942, -2.54357084,
4.69532678, -14.6606004 , -0.05835011, -1.44691948,
0.25269116, 16.61475052, 1.03506119, -8.50937806,
7.95500235, 0.33950895])
average_error = sum(abs(prediction-test_label)/len(test_label))
average_error
#이거는 MAE이다.
#MAE는 stands for mean absolute error
# c.f.) MSE는 stands for mean square error임.
# c.f.) RMSE는 stands for root mean square error임.
3.464185812406718
: regression
가로축 feature, 세로축 label
y=wx+b를 찾는거임.
w와 b 찾기
find optimal one. (minimum error인 것으로)
: for classification
가로축 module size, 세로축 양성인지, 음성인지.
세로축은 B(benigh) 아니면 M(malignant)임.
만약 이 문제를 linear regression으로 하면, y=ax+b꼴로 해야되는거니까, more data 점점 추가될 수록 그 기울기가 점점 작아질 것임.
x축이 0.5인 지점을 다 통과하는데, 모듈 추가됨에 따른 그래프가 기울기, y절편이 다 다름. (slope는 biased 됨, accuracy가 not high.)
Sigmoid function
y=시그모이드(wx+b)
에서 wx+b가 아무리 크거나 아무리 작아도
범위가 0~1 사이임.
y = w1x1 + w2x2 + ... + b 를 찾기 (w1, w2, ..를 찾는 방법은 얘기 안해줬음.)
얘는 fit을 쓰면 알아서 w1, w2, .... 를 찾아줬었음.
y = 시그모이드(w1x1 + w2x2 + ... + b) 를 찾기 => gradient descent method를 배울거임(?)
gradient dscent를 쓰면 알아서 w1, w1, .... 를 찾아줌.
Breast Cancer Dataset으로 연습해보자.
# 1. Data
from sklearn import datasets
cancer = datasets.load_breast_cancer()
# 1.1 Data Analysis
cancer
{'DESCR': '.. _breast_cancer_dataset:\n\nBreast cancer wisconsin (diagnostic) dataset\n--------------------------------------------\n\n**Data Set Characteristics:**\n\n :Number of Instances: 569\n\n :Number of Attributes: 30 numeric, predictive attributes and the class\n\n :Attribute Information:\n - radius (mean of distances from center to points on the perimeter)\n - texture (standard deviation of gray-scale values)\n - perimeter\n - area\n - smoothness (local variation in radius lengths)\n - compactness (perimeter^2 / area - 1.0)\n - concavity (severity of concave portions of the contour)\n - concave points (number of concave portions of the contour)\n - symmetry \n - fractal dimension ("coastline approximation" - 1)\n\n The mean, standard error, and "worst" or largest (mean of the three\n largest values) of these features were computed for each image,\n resulting in 30 features. For instance, field 3 is Mean Radius, field\n 13 is Radius SE, field 23 is Worst Radius.\n\n - class:\n - WDBC-Malignant\n - WDBC-Benign\n\n :Summary Statistics:\n\n ===================================== ====== ======\n Min Max\n ===================================== ====== ======\n radius (mean): 6.981 28.11\n texture (mean): 9.71 39.28\n perimeter (mean): 43.79 188.5\n area (mean): 143.5 2501.0\n smoothness (mean): 0.053 0.163\n compactness (mean): 0.019 0.345\n concavity (mean): 0.0 0.427\n concave points (mean): 0.0 0.201\n symmetry (mean): 0.106 0.304\n fractal dimension (mean): 0.05 0.097\n radius (standard error): 0.112 2.873\n texture (standard error): 0.36 4.885\n perimeter (standard error): 0.757 21.98\n area (standard error): 6.802 542.2\n smoothness (standard error): 0.002 0.031\n compactness (standard error): 0.002 0.135\n concavity (standard error): 0.0 0.396\n concave points (standard error): 0.0 0.053\n symmetry (standard error): 0.008 0.079\n fractal dimension (standard error): 0.001 0.03\n radius (worst): 7.93 36.04\n texture (worst): 12.02 49.54\n perimeter (worst): 50.41 251.2\n area (worst): 185.2 4254.0\n smoothness (worst): 0.071 0.223\n compactness (worst): 0.027 1.058\n concavity (worst): 0.0 1.252\n concave points (worst): 0.0 0.291\n symmetry (worst): 0.156 0.664\n fractal dimension (worst): 0.055 0.208\n ===================================== ====== ======\n\n :Missing Attribute Values: None\n\n :Class Distribution: 212 - Malignant, 357 - Benign\n\n :Creator: Dr. William H. Wolberg, W. Nick Street, Olvi L. Mangasarian\n\n :Donor: Nick Street\n\n :Date: November, 1995\n\nThis is a copy of UCI ML Breast Cancer Wisconsin (Diagnostic) datasets.\nhttps://goo.gl/U2Uwz2\n\nFeatures are computed from a digitized image of a fine needle\naspirate (FNA) of a breast mass. They describe\ncharacteristics of the cell nuclei present in the image.\n\nSeparating plane described above was obtained using\nMultisurface Method-Tree (MSM-T) [K. P. Bennett, "Decision Tree\nConstruction Via Linear Programming." Proceedings of the 4th\nMidwest Artificial Intelligence and Cognitive Science Society,\npp. 97-101, 1992], a classification method which uses linear\nprogramming to construct a decision tree. Relevant features\nwere selected using an exhaustive search in the space of 1-4\nfeatures and 1-3 separating planes.\n\nThe actual linear program used to obtain the separating plane\nin the 3-dimensional space is that described in:\n[K. P. Bennett and O. L. Mangasarian: "Robust Linear\nProgramming Discrimination of Two Linearly Inseparable Sets",\nOptimization Methods and Software 1, 1992, 23-34].\n\nThis database is also available through the UW CS ftp server:\n\nftp ftp.cs.wisc.edu\ncd math-prog/cpo-dataset/machine-learn/WDBC/\n\n.. topic:: References\n\n - W.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction \n for breast tumor diagnosis. IS&T/SPIE 1993 International Symposium on \n Electronic Imaging: Science and Technology, volume 1905, pages 861-870,\n San Jose, CA, 1993.\n - O.L. Mangasarian, W.N. Street and W.H. Wolberg. Breast cancer diagnosis and \n prognosis via linear programming. Operations Research, 43(4), pages 570-577, \n July-August 1995.\n - W.H. Wolberg, W.N. Street, and O.L. Mangasarian. Machine learning techniques\n to diagnose breast cancer from fine-needle aspirates. Cancer Letters 77 (1994) \n 163-171.',
'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
1.189e-01],
[2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
8.902e-02],
[1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
8.758e-02],
...,
[1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
7.820e-02],
[2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
1.240e-01],
[7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
7.039e-02]]),
'feature_names': array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
'mean smoothness', 'mean compactness', 'mean concavity',
'mean concave points', 'mean symmetry', 'mean fractal dimension',
'radius error', 'texture error', 'perimeter error', 'area error',
'smoothness error', 'compactness error', 'concavity error',
'concave points error', 'symmetry error',
'fractal dimension error', 'worst radius', 'worst texture',
'worst perimeter', 'worst area', 'worst smoothness',
'worst compactness', 'worst concavity', 'worst concave points',
'worst symmetry', 'worst fractal dimension'], dtype='<U23'),
'filename': '/usr/local/lib/python3.7/dist-packages/sklearn/datasets/data/breast_cancer.csv',
'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0,
0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,
1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,
1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1,
1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,
1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1]),
'target_names': array(['malignant', 'benign'], dtype='<U9')}
type(cancer)
#요거는 dictionary data type이다.
sklearn.utils.Bunch
cancer.keys()
dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])
cancer['target_names'] #이것의 value 볼거임.
#malignant는 0 , benigh은 1
array(['malignant', 'benign'], dtype='<U9')
cancer['target']
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0,
0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,
1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,
1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1,
1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,
1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1])
cancer['target'].shape
(569,)
cancer['data']
array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
1.189e-01],
[2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
8.902e-02],
[1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
8.758e-02],
...,
[1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
7.820e-02],
[2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
1.240e-01],
[7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
7.039e-02]])
type(cancer['data'])
numpy.ndarray
cancer['data']. shape
# row(세로; patients)는 569개, column(feature 수)은 30.
(569, 30)
cancer['feature_names']
#30개의 feature 있다. cell images임.
#즉, 각 patients는 30feature 있음.
#target은 row가 569, column 없음. 즉, 0,1,1,1,0, 이런 게 569개임.
array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
'mean smoothness', 'mean compactness', 'mean concavity',
'mean concave points', 'mean symmetry', 'mean fractal dimension',
'radius error', 'texture error', 'perimeter error', 'area error',
'smoothness error', 'compactness error', 'concavity error',
'concave points error', 'symmetry error',
'fractal dimension error', 'worst radius', 'worst texture',
'worst perimeter', 'worst area', 'worst smoothness',
'worst compactness', 'worst concavity', 'worst concave points',
'worst symmetry', 'worst fractal dimension'], dtype='<U23')
target = cancer['target']
data = cancer['data']
# 이거 둘은 label임.
data.shape
(569, 30)
target.shape
(569,)
전에는 datasets.boston_hosing.load_data()가 저절로 training data와 test data가 들어갔음.
근데 이거는 그냥 data만 569개가 주어지는 거임. 나뉘어서 주어지는 게 아니구.
그래서 이걸 train data와 test data로 우리가 split 해주어야 해.
# 1.2 Train/Test Split
from sklearn.model_selection import train_test_split #이거는 function임.
# 2. Modeling (Hypothesis)
# 2.1 Import LinearRegression Class
from sklearn.linear_model import LogisticRegression
train_data, test_data, train_label, test_label = train_test_split(data, target, test_size = .2)
#이 말은 training:test = 8:2로 나누겠단 소리임.
지금 이렇게 data가 train과 test용으로 나뉘어져서 주어지는지, 그냥 통째로 주어지는지는 housing 문제와 cancer 문제 특성에 따른 차이인가요, 아니면 linear regression, logistic regression 차이 인가요?
keras는 datasets 있다. boston housing,.. 등등
sklearn은 datasets 있다.boston housing, breast cancer, ... 등등
keras는 train_data, train_label, test_data, test_label으로 나누어서 주어짐.
sklearn은 그냥 통째로 주기 때문에, from sklearn.model_selection import train_test_split 으로 우리가 스스로 나눠야 함.
sklearn이 realistic 문제랑 비슷한거지. keras처럼 나눠서 주어지는 게 아니니까....
train_data.shape
#569의 80퍼
(455, 30)
test_data.shape
#569의 20퍼
(114, 30)
train_label.shape
(455,)
test_label.shape
(114,)
이제 잘 나뉘어졌네!
# 2.1 Import LinearRegression Class
from sklearn.linear_model import LogisticRegression
# 2.2 Instance (object) creation
model = LogisticRegression( )
# LogisticRegression 가 class
# model이 instance
y = sigmoid ( w1x1 + w2x2 + ... + w30x30 + b )
sigmoid(x) = 1/ (1+exp(-x))
# 3. Training
model.fit(train_data, train_label)
#이걸 씀으로써 each w1, w2, .., w30, b를 계산함 지가 알아서
/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
intercept_scaling=1, l1_ratio=None, max_iter=100,
multi_class='auto', n_jobs=None, penalty='l2',
random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
warm_start=False)
# 4. Evaluation
#이제 predict 해보자. 이 array가 predict된 값들임.
prediction = model.predict(test_data)
prediction
array([1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1,
1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1,
1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1,
0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0,
1, 0, 0, 1])
test_label #요게 true value.
array([1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1,
1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,
0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0,
0, 0, 0, 1])
test_label.shape
(114,)
#predict된 거랑 true value랑 비교해보자.!
prediction == test_label
array([ True, True, True, True, True, True, True, True, True,
False, True, True, False, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, False, True,
True, True, True, True, True, True, True, True, True,
False, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, False, True, True,
False, False, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, False, True, False, True, True, True,
True, True, False, True, True, True])
err = sum( prediction == test_label ) / len(test_label)
err
#이게 accuracy
0.9122807017543859
print (f"Accuracy는 {err*100:.2f} % 이다. ")
Accuracy는 91.23 % 이다.
model.coef_
array([[ 0.67909084, 0.50701504, 0.43432104, -0.02272138, -0.02564145,
-0.12170047, -0.16078863, -0.0683831 , -0.03784695, -0.00847199,
0.02065279, 0.19986506, 0.09158545, -0.1224808 , -0.00195854,
-0.02477716, -0.03070973, -0.00852449, -0.00824051, -0.00197348,
0.66507928, -0.47527486, -0.31649682, -0.00716629, -0.04790078,
-0.39662103, -0.45906953, -0.13920924, -0.12219758, -0.03868945]])
len(model.coef_)
1
model.coef_.shape #2차원이다. 30features를 의미. w1, w2, ..., w30까지!
(1, 30)
30개의 feature 중 which feature(parameter)가 중요할까?
coefficient = model.coef_
coefficient
array([[ 0.67909084, 0.50701504, 0.43432104, -0.02272138, -0.02564145,
-0.12170047, -0.16078863, -0.0683831 , -0.03784695, -0.00847199,
0.02065279, 0.19986506, 0.09158545, -0.1224808 , -0.00195854,
-0.02477716, -0.03070973, -0.00852449, -0.00824051, -0.00197348,
0.66507928, -0.47527486, -0.31649682, -0.00716629, -0.04790078,
-0.39662103, -0.45906953, -0.13920924, -0.12219758, -0.03868945]])
ordered_coefficient = np.sort(coefficient) #작은 것부터 정렬함.
ordered_coefficient
array([[-0.47527486, -0.45906953, -0.39662103, -0.31649682, -0.16078863,
-0.13920924, -0.1224808 , -0.12219758, -0.12170047, -0.0683831 ,
-0.04790078, -0.03868945, -0.03784695, -0.03070973, -0.02564145,
-0.02477716, -0.02272138, -0.00852449, -0.00847199, -0.00824051,
-0.00716629, -0.00197348, -0.00195854, 0.02065279, 0.09158545,
0.19986506, 0.43432104, 0.50701504, 0.66507928, 0.67909084]])
ordered_features = np.argsort(coefficient) #위에꺼 index순서를 알려줌.
ordered_features
array([[21, 26, 25, 22, 6, 27, 13, 28, 5, 7, 24, 29, 8, 16, 4, 15,
3, 17, 9, 18, 23, 19, 14, 10, 12, 11, 2, 1, 20, 0]])
cancer.keys()
dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])
cancer['feature_names']
array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
'mean smoothness', 'mean compactness', 'mean concavity',
'mean concave points', 'mean symmetry', 'mean fractal dimension',
'radius error', 'texture error', 'perimeter error', 'area error',
'smoothness error', 'compactness error', 'concavity error',
'concave points error', 'symmetry error',
'fractal dimension error', 'worst radius', 'worst texture',
'worst perimeter', 'worst area', 'worst smoothness',
'worst compactness', 'worst concavity', 'worst concave points',
'worst symmetry', 'worst fractal dimension'], dtype='<U23')
feature_importance = []
for i in ordered_features :
feature_importance.append( cancer['feature_names'][i] )
feature_importance
[array(['worst texture', 'worst concavity', 'worst compactness',
'worst perimeter', 'mean concavity', 'worst concave points',
'area error', 'worst symmetry', 'mean compactness',
'mean concave points', 'worst smoothness',
'worst fractal dimension', 'mean symmetry', 'concavity error',
'mean smoothness', 'compactness error', 'mean area',
'concave points error', 'mean fractal dimension', 'symmetry error',
'worst area', 'fractal dimension error', 'smoothness error',
'radius error', 'perimeter error', 'texture error',
'mean perimeter', 'mean texture', 'worst radius', 'mean radius'],
dtype='<U23')]
맨 뒤에꺼가 제일 중요.
매번 시도할 때마다 다름.
gradient descend method 에 따라 w1~w30가 ramdom number로 책정되기(?) 때문이다.
from sklearn.linear_model import LinearRegression
model_linear = LinearRegression()
model_linear.fit(train_data, train_label)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
model_linear.predict(test_data) #이건 0,1가 아니라 regression problem
array([ 0.82958726, -0.0468439 , 0.66335586, 1.09076769, 1.10878327,
0.77038134, 0.83021331, 0.26592654, 0.00218734, 0.61928817,
0.91521736, 0.24919443, 0.6411021 , 0.29329216, 0.77428153,
1.04935029, 0.73337761, -0.12183006, 0.97854904, -0.02038955,
-0.03084984, 0.78977298, 0.56479717, 0.57452942, 0.66556579,
0.93305669, 0.76510222, 0.95557409, -0.09225518, 0.9786988 ,
0.80927492, 0.92922175, 1.09128973, -0.21974031, 0.72266751,
0.16714178, 0.32522191, 0.74506921, 0.79064742, -0.02231387,
0.91247878, -0.03209831, 0.03023182, 0.94682078, 1.00240809,
0.71164806, 0.68630679, 0.97837808, 1.02111614, 1.00225203,
-0.19565607, 0.08323068, 0.84780384, 0.82561435, 1.44655529,
-0.10444867, 0.84665081, 1.28556365, 0.92005851, 0.46845792,
0.95119737, 0.95451611, 0.67159841, 0.76225707, 0.73546 ,
0.13724965, 0.22663824, -0.21060108, 0.93591299, 0.84379665,
0.61048998, 0.60842576, 0.48219573, 0.70648477, 1.06404222,
0.26365613, 0.85124995, 1.00474478, 0.72756632, 0.98279584,
0.94339108, 0.57039288, 0.75205684, 0.96371311, -0.10352382,
0.13311175, 0.84577004, 0.69554115, 0.16946259, 0.65732328,
0.651846 , 1.194728 , 0.80924084, 0.85509982, 0.77989717,
0.92918605, -0.32992307, 0.73980673, 1.0821322 , 0.10125035,
0.82289594, 0.48386663, 0.55732581, 1.13703911, 0.57026168,
0.80023522, 0.95921886, -0.36222435, 1.15013267, -0.26593946,
0.40693039, 0.32296772, 0.17000705, 0.85939615])
import numpy as np
a = np.array([1,2,3,4])
b = np.array([1,2,2,3])
a==b
#true는 1, false는 0임.
array([ True, True, False, False])
Linear regression : regression only
logistic regression : classification only
K-NN, SVM, Decision Tree, Random Forest, XGBoost : both regression and classification
Basic algorithms : linear regression, logistic regression, Decision Tree
Advanced algorithms : SVM, Random Forest
Super-Advanced algorithms(King of ML) : XGBoost(2018)
가로축 : 할당량이라는 feature
세로축 : HBP라는 feature
o : cardiovascular disease
x : healthy
train model하지 않는다.
k = 3라 하면, 세모에서 가까운 세 개 찾음.
2개가 o, 1개가 x니까
세모는 o라고 판단 내림.
k=5라 하면 o:x = 3:2니까 역시 o라고 판단.
#방법1
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
# 어차피 이 함수 하나만 쓸거니까 이 방식을 더 많이 씀. 메모리 절약
same as above
#방법2
from sklearn import datasets
cancer = datasets.load_breast_cancer()
data = cancer['data']
target = cancer['target']
from sklearn.model_selection import train_test_split
train_data, test_data, train_label, test_label = \
train_test_split(data,target, test_size=0.2)
train_data[0]
#악성, 양성을 판가름 하는 30개의 feature 있다.
#test data와 target data를 랜덤하게 8:2로 나누기 때문에 계속 달라짐.
array([1.388e+01, 1.616e+01, 8.837e+01, 5.966e+02, 7.026e-02, 4.831e-02,
2.045e-02, 8.507e-03, 1.607e-01, 5.474e-02, 2.541e-01, 6.218e-01,
1.709e+00, 2.312e+01, 3.728e-03, 1.415e-02, 1.988e-02, 7.016e-03,
1.647e-02, 1.970e-03, 1.551e+01, 1.997e+01, 9.966e+01, 7.453e+02,
8.484e-02, 1.233e-01, 1.091e-01, 4.537e-02, 2.542e-01, 6.623e-02])
from sklearn.model_selection import train_test_split
train_data, test_data, train_label, test_label = train_test_split(data,target, test_size=0.2, random_state=99999)
train_data[0]
#이렇게 random_state로써 아무 숫자(seed number; 0~2**32 -1까지만)나 딱 입력해주면 고정이 됨 걍.
array([9.676e+00, 1.314e+01, 6.412e+01, 2.725e+02, 1.255e-01, 2.204e-01,
1.188e-01, 7.038e-02, 2.057e-01, 9.575e-02, 2.744e-01, 1.390e+00,
1.787e+00, 1.767e+01, 2.177e-02, 4.888e-02, 5.189e-02, 1.450e-02,
2.632e-02, 1.148e-02, 1.060e+01, 1.804e+01, 6.947e+01, 3.281e+02,
2.006e-01, 3.663e-01, 2.913e-01, 1.075e-01, 2.848e-01, 1.364e-01])
from sklearn.neighbors import KNeighborsClassifier
# KNeighborsClassifer, KNeighborsRegressor, KNeighborsTransformer 중 선택할 수 있음.
#model = KNeighborsClassifier()
model = KNeighborsClassifier(n_neighbors=3)
# 파라미터 : 괄호 안에 n_neighbots=3 이렇게 K를 설정할 수있음. default는 5임.
# 파라미터 : weights로 uniform, distance 설정 가능. default는 uniform임. uniform은 equally weighted. distance는 짧은 거리인거를 더 more weight 쳐줌.
model.fit(train_data, train_label)
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=None, n_neighbors=3, p=2,
weights='uniform')
prediction = model.predict(test_data)
prediction
array([0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0,
1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0,
1, 1, 1, 0])
prediction == test_label
array([ True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, False,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, False,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, False,
True, True, False, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, False])
accuracy = sum(prediction == test_label) / len(test_label) * 100
accuracy
95.6140350877193
print(f"accuracy는 {accuracy:.2f} %이다.")
accuracy는 95.61 %이다.
support vector간 거리 : width = w
w가 maximize되는 게 좋다.
근데 문제는,
이런 데이터는 못..함...
이렇게 하기야 하겠지만 못하는 애들(panelty)가 있음.
slack(원래 support vector로부터의 거리), panelty constant를 최소화 해야함.
penalty x penalty constant
딜레마 : maximize w를 해야하면서, minimize penalty 해야함.
그림처럼 원형인 데이터는 x는 위로, o는 아래로 내려서 분류할 수 있음.
gaussian kernel function
# Import library
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC #support vector classifier
# Load data
cancer = load_breast_cancer()
data = cancer['data']
target = cancer['target']
# Split data
train_data, test_data, train_label, test_label = train_test_split(data, target, test_size=0.2, random_state=999)
# Model training
model = SVC(C=0.1) #C의 default는 1이였음. more penalty 주기 위해 0.5로 함(?) decrease penalty 하려면 C=2
model.fit(train_data, train_label)
# Evaluation
prediction = model.predict(test_data)
accuracy = sum(prediction == test_label) / len(test_label) *100
print(accuracy)
84.21052631578947
cancer는 dictionary type이고, key인 'data'를 data에 넣음.
key인 'target'을 target에 넣음.
이것은 supervised learning임.
그 다음은 split data를 함. train과 test로!
random_state = ??어떤 숫자를 해줌으로써 seed를 딱 해줌. 안바뀌게!
그 다음은 # Model training
model = SVC() class의 instance를 model에 넣음.
model.fit(train_data, train_label) 하면 train 다 된거임.
그 다음은 # Evaluation
prediction이랑 test_label 이랑 비교함.
코드 맨 첫부분엔 import library를 한다.
# Import Library
import numpy as np
from tensorflow import keras
지난시간엔 SVC (support vector machine)를 짧게 설명했다.
이거 다 배울라면 1달 넘게 걸림...
이젠 더 디테일하게 SVC 배우겠따!
2D 그래프,
가로축 x1(feature)
세로축 x2(another feature)
circle들과 x들이 있다.
SVC는 그 경계를 찾는 것! support vector를 찾는 것임.
support vector와 중심선은 curve가 아니고 line임.
중심선이 hyper plane임.
3D 그래프
x1, x2, x3의 세가지 feature이 축임.
이것도 마찬가지로 plane으로 영역 구분 가능.
4D 면 3D로 영역 구분,
5D 면 4D로 영역 구분.
뭐가됐든 어쨋던 hyperplane임.
hyperplane은 2 group으로 seperating하는 애임. => seperating hyperplane (line, plane, 3D, 4D 등등일 수 있음.)
2D 문제에서, hyper plane은 w1x1 + w2x2 + b로 표현 가능.
첫번째그룹 : w1x1 + w2x2 + b >= 0 임. (동그라미들)
두번째그룹 : w1x1 + w2x2 + b < 0 임. (엑스들)
젤 가까운 동글은 +1,
젤 가까운 엑스는 -1
젤 가까운 동글은 w1x1 + w2x2 + b = +1
젤 가까운 엑스는 w1x1 + w2x2 + b = -1 이다.
젤 가까운 동글 과 w1x1 + w2x2 + b = 0 line과의 거리는,
| w1x1+ w2x2 + b | / (w1^2 + w2^2 )^(0.5)
= 1 / (w1^2 + w2^2 )^(0.5) = 1/ |w|
support vector간 whole distance는 2 / (w1^2 + w2^2 )^(0.5) = 2/ |w| 이다.
support vector machine은 그 거리를 maximize 해야 한다. 즉, margin을 maximize 하는 것. 2 / |w| 를 mximize 해야 함.
이걸 하기 위해, 역수를, 즉 (w1^2 + w2^2 )^(0.5) / 2를 minimize 해보자!!
즉 w1^2 + w2^2 를 minimize 해보자!
이건 two features니까 이런 식이 나온 거임.
만약 1000 features 문제였더라면 (w1^2 + w2^2 + ... + w1000 ^2 )^(0.5) 를 minimize 해야햇을 거임.
이건 결국 (1/2) w ??????????????
largrange 라그랑주가 이 fomulation 찾음.
🧸 1. not always seperable : line만으로 완벽히 o과 x를 나눌 수 없다 ㅜ.
1/2 w벡터T w벡터 + c*Sum( ) 를 minimize 해야함.
우리는 optimal c를 찾아야 함!!
원래는 c = 3, c =5 이런걸로 했었음.
penalty를 얼마나 줄지가 c 임.
🧸 2. 만약 x가 가운데에 원모양이구, 그 주변을 o가 둘르고 있다면, linear equation인 w1x1+w2x2 + w3x3 + ... 가지고는 표현 불가능. 그래서 변경(transfer, transform) 해줘야 함.
이건 gaussian 모양이다. 가운데에 x가 몰려있으니까. 3D로 바꿔서 위에있는 게 x, 아래있는 게 o라고 plane으로 나눔.
rbf : radial basis function 가지고 함.
w1x1 + w2x2 + b 와 같은 문제였더라면
w1x1^2 + w2 x2^2 + w3 x1x2로 바꿈.
w1x1 + w2x2 + w3x3 + b 와 같은 문제였더라면
w1x1^3 + w2 x2^3 + w3 x3^3 + w4 x1x2x3 + w5 x1제곱 x2 + w6 x1제곱 x3 + w7 x2제곱 x1 + .... 꼴의 polynomial로 바꺼야댐.
kernel 값은 default = 'rbf'임. 'linear', 'poly', 'rbf'로 할 수 있음 (?)
rbf에서 degree= 3 가 default임. degree =5 면 다 5승임.
model = SVC (kernel = 'poly', degree = 5)로 바꿀 수 있음.
rbf 방법이 제일 popular, 근데 어떤 경우는 poly가 나을 수도.
결국 우리는 C, kernel, degree를 결정해야 함.
Q ) 방금 알려준 위 아래로 나누는 게 rbf라는 거임 아님 poly란 거임? 그리고 왜 rbf가 디폴트임??
🧸 SVC랑 SVR 있음.
//43쪽
decision tree
root에서 internal node로까지만, 즉 첫번째로 갈라지는 게 depth1
leaf node까지 갈라지는 게 depth 2
우리는 information gain을 maximize해야함. 어케 갈라지는지를 최대한 자세히(?).
//44 symbol notation
parent node와 child node 있다.
Dp : 부모의 dataset
Np : 부모의 number of samples
Impurity를 계산할 수 있다.
'#1' : 50퍼의 정확도임. higher impurity
(부모) 병있는 그룹 100, 건강 그룹 100을
(자식1) 병50, 건강50
(자식2) 병50, 건강50
으로 나누고 싶음.
'#2' : 이게 더 나은 방법임. 80퍼의 정확도. lower impurity
(부모) 병있는 그룹 100, 건강 그룹 100을
(자식1) 병20, 건강80
(자식2) 병80, 건강20
으로 나누고 싶음.
dicision tree는 2 group으로 나누는 거임.
두 그룹간 차별을 둬야 함. discriminate two groups.
last group은 only 1 group should be.
'#2' 처럼 되려면, impurity를 낮춰야 함.
자꾸자꾸 자식으로 내려갈 수록 impurity를 낮춰야 함.
I-G(t)는 Gini impurity at the node t
impurity 계산하는 방법은 ~~~
세번째 그림은 only one group 이니까 pure하고, impurity=0
node가 impurity = 0 될 때까지 반복하는 거임.
맨 왼쪽이 worst, 맨 오른쪽이 best.
I_H (t) Entroyp at the node t
맨 왼쪽이 I_H(t) = 1 , 안좋다
맨 우측이 I_H(t) =
가운데가 50:50, gini impurity 최대인 곳, entropy가 최대인 곳.
둘이 비슷하니까 원하는 거 하믄 돼.
sklearn 에선 gini impurity가지고 평가하는 게 default임.
parent node가 worst
gini 가지고 평가해보니, parent는 0.5임.
자식1은 ~~~.
자식 2는 ~~~.
자식1에게 갈 애들은 0.32 * (50/200)
자식에게 갈수록 impurity 적어짐./
informaion gain인 IG(Dp,f) 을 maximize 해야 함.
왼쪽보다 오른쪽이 나음.
이게 Deicision Tree란 거임.
c.f.)
decision 해볼라구 tree 해보겠다
# Import library
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
# Load data
cancer = load_breast_cancer()
data = cancer['data']
target = cancer['target']
# Split data
train_data, test_data, train_label, test_label = train_test_split(data, target, test_size=0.2, random_state=999)
# Model training
model = DecisionTreeClassifier()
model.fit(train_data, train_label)
# Evaluation
prediction = model.predict(test_data)
accuracy = sum(prediction == test_label) / len(test_label) *100
print(accuracy)
91.22807017543859
💡 model = DecisionTreeClassifier() 에서의 파라미터
💡 파라미터 : criterion 은 디폴트가 'gini'임. 이거 말고 'entropy'로 해두 댐.
💡 파라미터 : splitter = 'best' 는 maximize information gain 하게 해주는 것.
💡 파라미터 : max_depth = None 이다. max_depth를 정할 수도 있음. 요게 가장 중요한 파라미터. performance는 나빠지겠지만 일단 뭐... 너무 깊은 deep tree는 accuracy가 나빠짐. 너~무 많이 해서.
ex) model = DecisionTreeClassifier(max_depth = 4) 등으로 할 수 있음.
💡 파라미터 : min_samples_split = 2 가 디폴트임.
Random forest 를 설명하고 끝내겠다.
decision tree가 가끔 bad performance 보여줌. 왜 그럴까? 를 생각해봤더니
oulier가 affects the decision tree.
그래서 어케 modify 할까..?
small decision tree 만들자!
예를들어, 1000 data, 100 features 가 원래 있는 문제인건데,
랜덤히 뽑힌 100 data, 10 features 갖고만 tree 만들고,
또 다른 걸로 tree 만들고 함.
숲을 만듦.
각 나무는 decision 있다.
그리고 이제 각 나무들 중 vote한다.
우리는 30 features 있다. for each patient.
0(malignant)인지 1(benign)인지 결정함.
어떤 feature는 중요, 어떤 feature는 별로 안중요.
그래서 어떤 feature가 중요한지 결정해야 함.
# Import library
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
# Load data
cancer = load_breast_cancer()
data = cancer['data']
target = cancer['target']
# Split data
train_data, test_data, train_label, test_label = train_test_split(data, target, test_size=0.2, random_state=999)
# Model training
model = DecisionTreeClassifier()
model.fit(train_data, train_label)
# Evaluation
prediction = model.predict(test_data)
accuracy = sum(prediction == test_label) / len(test_label) *100
print(accuracy)
90.35087719298247
type(cancer) # 이것은 dictionary 이다.
sklearn.utils.Bunch
cancer.keys()
dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])
cancer['feature_names']
array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
'mean smoothness', 'mean compactness', 'mean concavity',
'mean concave points', 'mean symmetry', 'mean fractal dimension',
'radius error', 'texture error', 'perimeter error', 'area error',
'smoothness error', 'compactness error', 'concavity error',
'concave points error', 'symmetry error',
'fractal dimension error', 'worst radius', 'worst texture',
'worst perimeter', 'worst area', 'worst smoothness',
'worst compactness', 'worst concavity', 'worst concave points',
'worst symmetry', 'worst fractal dimension'], dtype='<U23')
model.feature_importances_
#숫자 높은게 영향 많이 끼치는 애들
#0인 것은 영향을 안미치는 애들
array([0. , 0.00720941, 0.0137322 , 0. , 0. ,
0.01523722, 0. , 0. , 0. , 0. ,
0.00236753, 0.02403135, 0. , 0.01855336, 0. ,
0. , 0.02698257, 0. , 0. , 0.01380262,
0.02129366, 0. , 0.73950172, 0. , 0.00926378,
0. , 0. , 0.10802458, 0. , 0. ])
for i in range(30) :
print(f"{model.feature_importances_[i] : .4f}, {cancer['feature_names'][i]}" )
0.0000, mean radius 0.0072, mean texture 0.0137, mean perimeter 0.0000, mean area 0.0000, mean smoothness 0.0152, mean compactness 0.0000, mean concavity 0.0000, mean concave points 0.0000, mean symmetry 0.0000, mean fractal dimension 0.0024, radius error 0.0240, texture error 0.0000, perimeter error 0.0186, area error 0.0000, smoothness error 0.0000, compactness error 0.0270, concavity error 0.0000, concave points error 0.0000, symmetry error 0.0138, fractal dimension error 0.0213, worst radius 0.0000, worst texture 0.7395, worst perimeter 0.0000, worst area 0.0093, worst smoothness 0.0000, worst compactness 0.0000, worst concavity 0.1080, worst concave points 0.0000, worst symmetry 0.0000, worst fractal dimension
이제 얘를 정렬해보자.......
type(model.feature_importances_) #이건 numpy.ndarray 이다.
numpy.ndarray
model.feature_importances_.shape #이건 1D이고, 30개있다.
(30,)
import numpy as np
np.sort( model.feature_importances_ ) #이건 assending 해서 보여준다.
array([0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0.00236753, 0.00720941,
0.00926378, 0.0137322 , 0.01380262, 0.01523722, 0.01855336,
0.02129366, 0.02403135, 0.02698257, 0.10802458, 0.73950172])
sorted_feature_importances = np.sort( model.feature_importances_ )
sorted_feature_importances
array([0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0.00236753, 0.00720941,
0.00926378, 0.0137322 , 0.01380262, 0.01523722, 0.01855336,
0.02129366, 0.02403135, 0.02698257, 0.10802458, 0.73950172])
sorted_feature_importances = sorted_feature_importances[::-1] #하면 descending해준다. 중요!
sorted_feature_importances
array([0.73950172, 0.10802458, 0.02698257, 0.02403135, 0.02129366,
0.01855336, 0.01523722, 0.01380262, 0.0137322 , 0.00926378,
0.00720941, 0.00236753, 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ])
sorted_feature_index = np.argsort(model.feature_importances_)
sorted_feature_index = sorted_feature_index[::-1]
sorted_feature_index
array([22, 27, 16, 11, 20, 13, 5, 19, 2, 24, 1, 10, 3, 4, 6, 7, 8,
9, 29, 14, 12, 28, 15, 17, 18, 21, 23, 25, 26, 0])
for i in sorted_feature_index : #이렇게 하면 i는 22, 27, .... 임.
print(f"{model.feature_importances_[i] : .4f}, { cancer['feature_names'][i] }" ) #worst perimeter가 제일 중요한 파라미터인 것을 알 수 있따.
#0인 것은 영향 안끼친단 소리임. feature 넘 많다고 생각하면 이것들을 exclude할 수 있다.
0.7395, worst perimeter 0.1080, worst concave points 0.0270, concavity error 0.0240, texture error 0.0213, worst radius 0.0186, area error 0.0152, mean compactness 0.0138, fractal dimension error 0.0137, mean perimeter 0.0093, worst smoothness 0.0072, mean texture 0.0024, radius error 0.0000, mean area 0.0000, mean smoothness 0.0000, mean concavity 0.0000, mean concave points 0.0000, mean symmetry 0.0000, mean fractal dimension 0.0000, worst fractal dimension 0.0000, smoothness error 0.0000, perimeter error 0.0000, worst symmetry 0.0000, compactness error 0.0000, concave points error 0.0000, symmetry error 0.0000, worst texture 0.0000, worst area 0.0000, worst compactness 0.0000, worst concavity 0.0000, mean radius
Decision tree 예제.
XGBoost 오늘 배울꺼임.
XGBoost로 top 3 feature 뽑았고, 그게 LDH, hs-CRP, Lymphocyte 임.
이거가지고 죽을지, 살을지 결정.
우리는 sklearn으로 한다. import sklearn
many, small decision tree
new data가 각 tree에 들어가고,
각 tree가 results 만들어냄 : A, B, A, A, B, A, B, B, A
4A와 3B 이므로 결론은 A이다.
🌟 Many decision trees + bagging(Bootstrap Aggregating) + Feature Randomness
Bagging : random data sampling with replacement. 만약 3 data가 있는데, 중복을 허용해서 3개 선택함. ex) 2 1 2 이렇게 선택됨. 1000 data 있고, 1000 data를 뽑고 싶은 거라면, 그 1000 data엔 same data may exist. 그래서 원래의 1000data랑 뽑힌 거랑 다름.
Randomness : 랜덤하게 뽑음.
# Import library
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
# Load data
cancer = load_breast_cancer()
data = cancer['data']
target = cancer['target']
# Split data
train_data, test_data, train_label, test_label = train_test_split(data, target, test_size=0.2, random_state=999)
# Model training
model = RandomForestClassifier()
model.fit(train_data, train_label)
# Evaluation
prediction = model.predict(test_data)
accuracy = sum(prediction == test_label) / len(test_label) *100
print(accuracy)
93.85964912280701
이 방법이 decision tree 그거보다 더 정확함.
근데 그때그때 다름.
random_state = 999 이렇게 해줬는데 왜 다를까?
every single tree가 different data를 가짐.
또, feature가 random selected 되어서 그때마다 달라지는 것임.
💡 model = DecisionTreeClassifier()의 파라미터
💡 n_estimators = 100. small tree의 개수
💡 criterion = 'gini'. 아니면 'entropy' 가능.
💡 max_depth
💡 min_samples_split = 2
💡 min_samples_leaf = 1
💡 max_features = 'auto'. how many feature do you want to choose. default인 'auto'는 sqrt(n_features) 임. 100 feature면 이거 10로 함. 우리는 30feature니까 약 5.몇이니까 5개 선택함.
💡 bootstrap = True 는 randomly 하고 중복 허용. 1000sample 있었으면 1000sample 선택. (개수 같게) 만약 False로 하면 original data로 걍 한단 소리임.
💡 max_samples = None이고, int나 float로도 가능. none이면 기존 data를 그대로 사용(ex.1000개 데이터 있으면 그 1000개 그대로 다 사용), int인 100으로 하면, 100개의 data 가져와서 사용. floating이라고 쓰면, 0.1이라고 쓰면 10퍼만 갖고온다는 뜻.
train_data.shape
#하면 455 중에 455개를 뽑은 거임. 각 나무마다 다른 455개 데이터가 들어감. decision tree 개수는 100
(455, 30)
train_label.shape
(455,)
test_data.shape
(114, 30)
n_estimgators = 1000, bootstrap = 200, max_features = 3 로 하면,
# Import library
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
# Load data
cancer = load_breast_cancer()
data = cancer['data']
target = cancer['target']
# Split data
train_data, test_data, train_label, test_label = train_test_split(data, target, test_size=0.2, random_state=999)
# Model training
model = XGBClassifier()
model.fit(train_data, train_label)
# Evaluation
prediction = model.predict(test_data)
accuracy = sum(prediction == test_label) / len(test_label) *100
print(accuracy)
93.85964912280701
이게 제일 powerful
sklearn은 XGBoost 지원 안함.
from xgboost import XGBClassifier
model = XGBClassifier()
single : logistic regression, SVM, decision Tree
bagging : Random Forest : parallel : 여러 개 만들고 그중에 vote.
Boosting : model 만들고 계속 업그레이드 함. : sequential
4 data (주황, 연두, 연두, 주황) 있다고 하자.
drug dosage 따른 효과 있는지 없는지 판단. 즉 feature가 걍 한 개만..
8, 12 여야 effective (1) 5, 15면 non effective (0)
먼저, all initial prediction probability of 0.5 regadless of the dosage.
몇 mg이더라도 다 확률 0.5라고 해놓고 시작.
Residual : the differences between the observed and predicted values.
Similarity scores 얻음.
(식)
(0.5) * (1-0.5) + 0.5 x (1-0.5) + 0.5 x (1-0.5) + 0.5 x (1-0.5)
-0.5 + 0.5 + 0.5 - 0.5 를 square
이건 0임.
람다는 0 만드는 거 (?)
Make a tree
root로부터 leaf 찾아야 함.
이 네개를 나누는 방법엔 3가지 있음.
3:1, 2:2, 1:3
impurity랑 similarity랑 반대임
root는 impurity 크고(안좋음) , similarity 작다.
child 로 갈수록 impurity 줄여야 하고, similarity 커져야 함.
🍰 XGBoost
simularity score로 각각각 네모 판단
simularity score이 자식으로 갈수록 up 된다.
Information Gain =
( child_left similarity + child_right similarity ) - Parent similarity
🍰 Decesion Tree
impurity score이 자식으로 갈수록 down 된다.
Information Gain =
Parent Impurity - ( child_lefr impurity + child_right impurity )
각 leaf는 number가지고 있다.
data -> blackbox -> 3 (number)
output이 2이기도 함. 즉, 걍 숫자임.
output이 나오는 확률이 있음. related to probability
blackbox에 따라 3이 나오는 확률 있고, 2 나오는 확률이 따로 있음.
확률은 0~100% 임.
0.5는 50% 의미.
0.7는 70% 의미.
근데,,,,,,,,,,, 3이라고 나오면 확률이 뭔지 어케 알지?
혹은 output이 -4면?
작은 숫자가 더 작은 확률임.
이걸 0~1로 어케 바꿀까?
p는 0~1 값 갖는 확률.
odds = p/(1-p) 로 정의됨.
p에 따라 odd가 그래프가 이런 꼴임.
p가 0~1이면
odds는 0~무한대
probability to log
odd에 log 취함.
그러면 log odd 는 -무한 ~ 무한
Probability = ~~~~
블랙박스에 data 넣으면
그 결과(확률을 의미)가 -무한 ~ +무한으로 나옴.
블랙박스의 결과가 -2 +2 -2 면
-2, +2를 odd에 넣어서 probability 구할 수 있따
Soft max 는
image인 data를 넣으면
deep neural network인 model이 3 things(곰, 개, 고양이)로 classify 하는 model이,
곰이라고 판단해야 함.
bear 37, cat -1 dog -2 로 결과 냄.
~ 로 계산해서 개에 대한 확률,
고양이 확률,
곰일 확률을 계산할 수 있다.
개일 확률 :
exp(-2) / exp(-2) + exp(-1) + exp(37)
XGBoost로 하니까 전보다 더 빨강, 연구 구분 잘하게 됨.
learning weight인 0.3을 조절할 수 있음.파라미터로써. deafult는 0.1임.
그럼 output인 -2가 -0.6인 셈임.
그 -0.6을 확률식에다가 넣는거임.
이렇게, 처음보다 확률이 달라졌음.
이 경우에 similarity scores left, right, gain 구해보니 이와 같음.
import xgboost as xgb
model_xg = xgb.XGBClassifier()
#혹은 같은 의미로,
from xgboost import XGBClassifier
model_xg = XGBClassifier()
XGBClassifier() 안에 넣을 수 있는 파라미터
learning_weight = 0.1가 default임.
#2
a='100'
b='1000'
print(a+b)
a+b
1001000
'1001000'
#3
a={'students': ['Amy', 'Joe', 'Paul'] , 'score' : [100,99,98]}
type(a['students'])
list
#4
print(tuple(a.keys()))
('students', 'score')
#5
#print(a[0])
#6
a=3
b=0
c=a**b + a*b
print(c)
1
#7
a = 'Hello'
b = 'Python'
c = a[-1] + b[1:3]
print(c)
oyt
#8
a = 'Python'
print(a[::-1][-1])
P
#9
my_string = "passion"
#my_string[0:2] ="ap"
#print(my_string)
my_string
'passion'
#10
my_string = 'Student said "Python is fun"'
result = my_string.split('"')
print(result)
['Student said ', 'Python is fun', '']
#11
my_words = 'I am hungry'
new_word = my_words[9::-3]
print(new_word)
rumI
#12
x = 'learning'
y = x.upper() + x.lower()
print(y)
LEARNINGlearning
#13
my_words = "I am good at python programming"
my_num = my_words.find('a') + my_words.count('a')
print(my_num)
5
#14
my_words = "** python programming **"
new_words = my_words.strip(' *')
print(new_words)
python programming
#15
a=10
b=3
add = a+b
dev = a/b
print(f"The addtion is {add} and the devision is {dev:1.2f}")
The addtion is 13 and the devision is 3.33
#16
a={1,2,3,4,5}
b=a.copy()
c=a.union(b)
print(c)
{1, 2, 3, 4, 5}
#17
a={1,2,3,4,5}
a.update([3,7])
print(a)
{1, 2, 3, 4, 5, 7}
a="Nonono non non"
print(a.istitle(), a.isalpha() )
False False
print("""gyurin""")
gyurin
table data 있다.
feature1, feature2, feature3, ...
sklearn library 이용해서 ML 적용
엑셀 파일에 있는 거 열려면
왼쪽에서 경로 복사 할 수 있음.
/content/sample_data/california_housing_test.csv
엑셀파일 확장자 명은 .csv 아니면 .xlxs
그걸 로드(load)할 수 있음.
import pandas as pd
#파일의 주소를 string으로서 불러온다.
train = pd.read_csv("/content/sample_data/california_housing_train.csv")
test = pd.read_csv("/content/sample_data/california_housing_test.csv")
train
# 8 feature 있다. 각각의 question과 answer인 median_house_value가 있다.
# regression problem이다.
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | -114.31 | 34.19 | 15.0 | 5612.0 | 1283.0 | 1015.0 | 472.0 | 1.4936 | 66900.0 |
| 1 | -114.47 | 34.40 | 19.0 | 7650.0 | 1901.0 | 1129.0 | 463.0 | 1.8200 | 80100.0 |
| 2 | -114.56 | 33.69 | 17.0 | 720.0 | 174.0 | 333.0 | 117.0 | 1.6509 | 85700.0 |
| 3 | -114.57 | 33.64 | 14.0 | 1501.0 | 337.0 | 515.0 | 226.0 | 3.1917 | 73400.0 |
| 4 | -114.57 | 33.57 | 20.0 | 1454.0 | 326.0 | 624.0 | 262.0 | 1.9250 | 65500.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 16995 | -124.26 | 40.58 | 52.0 | 2217.0 | 394.0 | 907.0 | 369.0 | 2.3571 | 111400.0 |
| 16996 | -124.27 | 40.69 | 36.0 | 2349.0 | 528.0 | 1194.0 | 465.0 | 2.5179 | 79000.0 |
| 16997 | -124.30 | 41.84 | 17.0 | 2677.0 | 531.0 | 1244.0 | 456.0 | 3.0313 | 103600.0 |
| 16998 | -124.30 | 41.80 | 19.0 | 2672.0 | 552.0 | 1298.0 | 478.0 | 1.9797 | 85800.0 |
| 16999 | -124.35 | 40.54 | 52.0 | 1820.0 | 300.0 | 806.0 | 270.0 | 3.0147 | 94600.0 |
17000 rows × 9 columns
type(train)
pandas.core.frame.DataFrame
pandas에는 2 data type 있다.
series : 1D : only 1 feature
data frame : 2D
train.shape
(17000, 9)
train.head()
# top 5 data만 보여줌. default=5
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | -114.31 | 34.19 | 15.0 | 5612.0 | 1283.0 | 1015.0 | 472.0 | 1.4936 | 66900.0 |
| 1 | -114.47 | 34.40 | 19.0 | 7650.0 | 1901.0 | 1129.0 | 463.0 | 1.8200 | 80100.0 |
| 2 | -114.56 | 33.69 | 17.0 | 720.0 | 174.0 | 333.0 | 117.0 | 1.6509 | 85700.0 |
| 3 | -114.57 | 33.64 | 14.0 | 1501.0 | 337.0 | 515.0 | 226.0 | 3.1917 | 73400.0 |
| 4 | -114.57 | 33.57 | 20.0 | 1454.0 | 326.0 | 624.0 | 262.0 | 1.9250 | 65500.0 |
train.head(10)
# top 10 data만 보여줌.
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | -114.31 | 34.19 | 15.0 | 5612.0 | 1283.0 | 1015.0 | 472.0 | 1.4936 | 66900.0 |
| 1 | -114.47 | 34.40 | 19.0 | 7650.0 | 1901.0 | 1129.0 | 463.0 | 1.8200 | 80100.0 |
| 2 | -114.56 | 33.69 | 17.0 | 720.0 | 174.0 | 333.0 | 117.0 | 1.6509 | 85700.0 |
| 3 | -114.57 | 33.64 | 14.0 | 1501.0 | 337.0 | 515.0 | 226.0 | 3.1917 | 73400.0 |
| 4 | -114.57 | 33.57 | 20.0 | 1454.0 | 326.0 | 624.0 | 262.0 | 1.9250 | 65500.0 |
| 5 | -114.58 | 33.63 | 29.0 | 1387.0 | 236.0 | 671.0 | 239.0 | 3.3438 | 74000.0 |
| 6 | -114.58 | 33.61 | 25.0 | 2907.0 | 680.0 | 1841.0 | 633.0 | 2.6768 | 82400.0 |
| 7 | -114.59 | 34.83 | 41.0 | 812.0 | 168.0 | 375.0 | 158.0 | 1.7083 | 48500.0 |
| 8 | -114.59 | 33.61 | 34.0 | 4789.0 | 1175.0 | 3134.0 | 1056.0 | 2.1782 | 58400.0 |
| 9 | -114.60 | 34.83 | 46.0 | 1497.0 | 309.0 | 787.0 | 271.0 | 2.1908 | 48100.0 |
train.tail()
# 하위 5개만 보여줌. default=5
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | |
|---|---|---|---|---|---|---|---|---|---|
| 16995 | -124.26 | 40.58 | 52.0 | 2217.0 | 394.0 | 907.0 | 369.0 | 2.3571 | 111400.0 |
| 16996 | -124.27 | 40.69 | 36.0 | 2349.0 | 528.0 | 1194.0 | 465.0 | 2.5179 | 79000.0 |
| 16997 | -124.30 | 41.84 | 17.0 | 2677.0 | 531.0 | 1244.0 | 456.0 | 3.0313 | 103600.0 |
| 16998 | -124.30 | 41.80 | 19.0 | 2672.0 | 552.0 | 1298.0 | 478.0 | 1.9797 | 85800.0 |
| 16999 | -124.35 | 40.54 | 52.0 | 1820.0 | 300.0 | 806.0 | 270.0 | 3.0147 | 94600.0 |
train.tail(3)
# 하위 3개만 보여줌.
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | |
|---|---|---|---|---|---|---|---|---|---|
| 16997 | -124.30 | 41.84 | 17.0 | 2677.0 | 531.0 | 1244.0 | 456.0 | 3.0313 | 103600.0 |
| 16998 | -124.30 | 41.80 | 19.0 | 2672.0 | 552.0 | 1298.0 | 478.0 | 1.9797 | 85800.0 |
| 16999 | -124.35 | 40.54 | 52.0 | 1820.0 | 300.0 | 806.0 | 270.0 | 3.0147 | 94600.0 |
test.shape
(3000, 9)
기존 train 17000, 기존 test 3000개 합쳐서
20000개 만들고
그걸 다시 train 80퍼, test 20퍼로 나누고 싶다.
train = train.append(test)
train.shape
(20000, 9)
train.info()
# non null count는 missing data (??????/다시듣기 23분)
<class 'pandas.core.frame.DataFrame'> Int64Index: 20000 entries, 0 to 2999 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 longitude 20000 non-null float64 1 latitude 20000 non-null float64 2 housing_median_age 20000 non-null float64 3 total_rooms 20000 non-null float64 4 total_bedrooms 20000 non-null float64 5 population 20000 non-null float64 6 households 20000 non-null float64 7 median_income 20000 non-null float64 8 median_house_value 20000 non-null float64 dtypes: float64(9) memory usage: 1.5 MB
When you import the excel file data, you will check data size and type, ...
Then, you should check whether any missing data is in the data sheet.
사실 이경우는 null인 data가 없긴 함.
train.isnull()
#이거 했는데 true 나오면 missing되어있단거임.
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | False | False | False | False | False | False | False | False | False |
| 1 | False | False | False | False | False | False | False | False | False |
| 2 | False | False | False | False | False | False | False | False | False |
| 3 | False | False | False | False | False | False | False | False | False |
| 4 | False | False | False | False | False | False | False | False | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2995 | False | False | False | False | False | False | False | False | False |
| 2996 | False | False | False | False | False | False | False | False | False |
| 2997 | False | False | False | False | False | False | False | False | False |
| 2998 | False | False | False | False | False | False | False | False | False |
| 2999 | False | False | False | False | False | False | False | False | False |
20000 rows × 9 columns
train.isnull().sum()
#false는 0, true는1이니까 이걸 더해서 알려줌.
longitude 0 latitude 0 housing_median_age 0 total_rooms 0 total_bedrooms 0 population 0 households 0 median_income 0 median_house_value 0 dtype: int64
NaN (missing data)는 가끔 infinite data로 간주됨.
model 형성 시 안좋음.
그럼 그 whole 없애버려야 함, 혹은 manually put the number.
train.head(3)
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | -114.31 | 34.19 | 15.0 | 5612.0 | 1283.0 | 1015.0 | 472.0 | 1.4936 | 66900.0 |
| 1 | -114.47 | 34.40 | 19.0 | 7650.0 | 1901.0 | 1129.0 | 463.0 | 1.8200 | 80100.0 |
| 2 | -114.56 | 33.69 | 17.0 | 720.0 | 174.0 | 333.0 | 117.0 | 1.6509 | 85700.0 |
train.population #방법1
0 1015.0
1 1129.0
2 333.0
3 515.0
4 624.0
...
2995 1258.0
2996 3496.0
2997 693.0
2998 46.0
2999 753.0
Name: population, Length: 20000, dtype: float64
train['population'] #방법2
0 1015.0
1 1129.0
2 333.0
3 515.0
4 624.0
...
2995 1258.0
2996 3496.0
2997 693.0
2998 46.0
2999 753.0
Name: population, Length: 20000, dtype: float64
train.shape
(20000, 9)
train (20000) should be split into train and test data.
근데 그러기 전에! feature와 label을 구분(seperate) 해줘야 함.
drop은 remove하는 것임.
3D 인 데이터는 axis 3개임. axis0, axis1, axis2
feature : X (대문자)
output : y (소문자)
y = train.median_house_value
X = train.drop(['median_house_value'], axis=1)
# 하면 ['median_house_value']에 대한 게 싹 다 날라감.
X.head() #Question
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | |
|---|---|---|---|---|---|---|---|---|
| 0 | -114.31 | 34.19 | 15.0 | 5612.0 | 1283.0 | 1015.0 | 472.0 | 1.4936 |
| 1 | -114.47 | 34.40 | 19.0 | 7650.0 | 1901.0 | 1129.0 | 463.0 | 1.8200 |
| 2 | -114.56 | 33.69 | 17.0 | 720.0 | 174.0 | 333.0 | 117.0 | 1.6509 |
| 3 | -114.57 | 33.64 | 14.0 | 1501.0 | 337.0 | 515.0 | 226.0 | 3.1917 |
| 4 | -114.57 | 33.57 | 20.0 | 1454.0 | 326.0 | 624.0 | 262.0 | 1.9250 |
X.shape
(20000, 8)
y #Answer
0 66900.0
1 80100.0
2 85700.0
3 73400.0
4 65500.0
...
2995 225000.0
2996 237200.0
2997 62000.0
2998 162500.0
2999 500001.0
Name: median_house_value, Length: 20000, dtype: float64
y.shape
(20000,)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
# Q(feature) , A(output, label)
X_train.shape
(16000, 8)
X_test.shape
(4000, 8)
y_train.shape #1D임.
(16000,)
y_test.shape
(4000,)
Preprocessing
X_train.head(10)
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | |
|---|---|---|---|---|---|---|---|---|
| 7013 | -118.31 | 33.88 | 32.0 | 2421.0 | 671.0 | 1491.0 | 587.0 | 3.5644 |
| 7766 | -118.38 | 33.81 | 20.0 | 1975.0 | 306.0 | 703.0 | 292.0 | 8.5491 |
| 5910 | -118.20 | 33.77 | 40.0 | 2034.0 | 899.0 | 1257.0 | 797.0 | 1.2864 |
| 7020 | -118.31 | 33.82 | 39.0 | 2198.0 | 425.0 | 1160.0 | 436.0 | 4.1406 |
| 16952 | -124.15 | 40.80 | 47.0 | 1486.0 | 335.0 | 765.0 | 329.0 | 1.7550 |
| 9499 | -119.29 | 36.33 | 19.0 | 792.0 | 232.0 | 641.0 | 222.0 | 0.7445 |
| 3660 | -117.92 | 33.83 | 36.0 | 1072.0 | 193.0 | 639.0 | 196.0 | 5.0275 |
| 10548 | -120.44 | 34.93 | 15.0 | 868.0 | 244.0 | 1133.0 | 253.0 | 2.0995 |
| 9660 | -119.52 | 34.41 | 20.0 | 4489.0 | 800.0 | 2867.0 | 765.0 | 4.8060 |
| 4797 | -118.09 | 34.06 | 31.0 | 1146.0 | 289.0 | 1163.0 | 258.0 | 2.2083 |
logistic regression은
H = w1x1 + w2x2 + w3x3 + ... + w8x8 + b
에서 w1, w2, ..., w8을 찾는 거였음. optimal combination of weight value.
근데, longitude는 - value이다.
latitude는 약 30 등
각 feature마다 range 너무 다양.. scale 다양
scale 큰 feature(ex. X4)가 affects the result dominantly.
그리고 만약 X4에 에러가 10퍼라면, 즉 5000인 데이터의 에러가 500인데, 따른 feature들보다도 큼.
이건 불공평해!!!!! unfair.
때문에 nomalization이 필요하다.
mean value가 0이고 표준편차(stand deviation)이 1이 되도록 정규분포 만들거다.
from sklearn.preprocessing import StandardScaler #얜 class다.
scaler = StandardScaler() #instance 지정
a = scaler.fit(X_train) #calculate the mean and standard deviation.
X_train = a.transform(X_train)
#혹은
X_train = scaler.fit_transform(X_train)
X_train #일케 numpy로 바뀜. ,X_train.head(10) 하면 에러
#이제 fair하다
array([[ 0.63342424, -0.82381491, 0.26407958, ..., 0.05895535,
0.22970607, -0.16426926],
[ 0.59856624, -0.856563 , -0.68841666, ..., -0.63437891,
-0.54590604, 2.45500955],
[ 0.68820108, -0.8752762 , 0.89907707, ..., -0.14693325,
0.78183672, -1.36127552],
...,
[ 0.72305908, -0.83317151, 0.66095301, ..., -0.21204332,
-0.22777361, 0.6849326 ],
[-0.93020583, 1.34691604, 0.66095301, ..., -0.47160373,
-0.49332217, -0.69246535],
[ 1.53973211, -0.8612413 , -1.24403946, ..., -0.13197553,
0.49525462, -0.07220802]])
X_train[0]
array([ 0.63342424, -0.82381491, 0.26407958, -0.09720812, 0.31761172,
0.05895535, 0.22970607, -0.16426926])
"""
from sklearn.linear_model import LogisticRegression
model = LogisticRegression( )
model.fit(X_train, y_train)
"""
/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:818: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG,
LogisticRegression()
from sklearn.linear_model import LinearRegression
model = LinearRegression( )
model.fit(X_train, y_train)
LinearRegression()
prediction = model.predict(X_test)
sum(abs(prediction - y_test))/len(y_test) #이렇게 하면 안된다. test data도 mean, sd 바꿔야 댐.
3904772.310203393
우리는 train data에서 each feature마다 mean과 standard deviation 계산해주었다.
(X-mean)/std로 각각이 대체되었다.
test data에서도 똑같이 하려면 문제가 있다.
왜냐면 test data는 should not be involved. should be isolated for taining.
scaler = StandardScaler() 에서,
sclaer는 training data에 대한 mean, std 있는거다.
X_test = scaler.transform(X_test)
prediction = model.predict(X_test)
sum(abs(prediction - y_test))/len(y_test)
/usr/local/lib/python3.7/dist-packages/sklearn/base.py:446: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names "X does not have valid feature names, but"
5913596.858557027
data1 = pd.read_excel("/content/diabetes_example.xlsx")
data1
| height | weight | gender | age | high pressure | low pressure | comorbidity | diabetes | |
|---|---|---|---|---|---|---|---|---|
| 0 | 150 | 50.0 | M | 10.0 | 100 | 50 | stroke | normal |
| 1 | 160 | 60.0 | M | NaN | 110 | 60 | asthma | normal |
| 2 | 170 | 70.0 | F | 30.0 | 120 | 70 | none | diabetes |
| 3 | 180 | 80.0 | F | 40.0 | 130 | 80 | hypertension | diabetes |
| 4 | 190 | 90.0 | M | 50.0 | 140 | 90 | sleep apnea | normal |
| 5 | 200 | NaN | M | 60.0 | 150 | 100 | none | normal |
여기엔 missing value 있다. infinite number로 간주된다.
data1.isnull() #하면 missing value를 True인걸로 확인 가능.
| height | weight | gender | age | high pressure | low pressure | comorbidity | diabetes | |
|---|---|---|---|---|---|---|---|---|
| 0 | False | False | False | False | False | False | False | False |
| 1 | False | False | False | True | False | False | False | False |
| 2 | False | False | False | False | False | False | False | False |
| 3 | False | False | False | False | False | False | False | False |
| 4 | False | False | False | False | False | False | False | False |
| 5 | False | True | False | False | False | False | False | False |
data1.isnull().sum() #하면 각 행마다 몇갠지 세줌.
height 0 weight 1 gender 0 age 1 high pressure 0 low pressure 0 comorbidity 0 diabetes 0 dtype: int64
위에서 1열과 5열을 없애주고 싶다.
data1 = data1.dropna(axis=0) #axis0은 세로축을 의미
data1
| height | weight | gender | age | high pressure | low pressure | comorbidity | diabetes | |
|---|---|---|---|---|---|---|---|---|
| 0 | 150 | 50.0 | M | 10.0 | 100 | 50 | stroke | normal |
| 2 | 170 | 70.0 | F | 30.0 | 120 | 70 | none | diabetes |
| 3 | 180 | 80.0 | F | 40.0 | 130 | 80 | hypertension | diabetes |
| 4 | 190 | 90.0 | M | 50.0 | 140 | 90 | sleep apnea | normal |
data1 = data1.dropna(axis=1) #axis1은 가로축을 의미 age 통째로, weight 통째로 없앰.
data1
| height | weight | gender | age | high pressure | low pressure | comorbidity | diabetes | |
|---|---|---|---|---|---|---|---|---|
| 0 | 150 | 50.0 | M | 10.0 | 100 | 50 | stroke | normal |
| 2 | 170 | 70.0 | F | 30.0 | 120 | 70 | none | diabetes |
| 3 | 180 | 80.0 | F | 40.0 | 130 | 80 | hypertension | diabetes |
| 4 | 190 | 90.0 | M | 50.0 | 140 | 90 | sleep apnea | normal |
male은 0, F는 1로 하고 싶다.
normal은 0, diabetes는 1로 하고 싶다.
mapping = {'M':0, 'F':1}
data1['gender'] = data1['gender'].map(mapping)
data1
| height | weight | gender | age | high pressure | low pressure | comorbidity | diabetes | |
|---|---|---|---|---|---|---|---|---|
| 0 | 150 | 50.0 | 0 | 10.0 | 100 | 50 | stroke | normal |
| 2 | 170 | 70.0 | 1 | 30.0 | 120 | 70 | none | diabetes |
| 3 | 180 | 80.0 | 1 | 40.0 | 130 | 80 | hypertension | diabetes |
| 4 | 190 | 90.0 | 0 | 50.0 | 140 | 90 | sleep apnea | normal |
mapping = {'normal':0, 'diabetes':1}
data1['diabetes'] = data1['diabetes'].map(mapping)
data1
| height | weight | gender | age | high pressure | low pressure | comorbidity | diabetes | |
|---|---|---|---|---|---|---|---|---|
| 0 | 150 | 50.0 | 0 | 10.0 | 100 | 50 | stroke | 0 |
| 2 | 170 | 70.0 | 1 | 30.0 | 120 | 70 | none | 1 |
| 3 | 180 | 80.0 | 1 | 40.0 | 130 | 80 | hypertension | 1 |
| 4 | 190 | 90.0 | 0 | 50.0 | 140 | 90 | sleep apnea | 0 |
#comorbidity_mapping = {'stroke':0, 'asthma':1, 'none':2, 'hypertension':3, 'sleep apnea':4}
#data1['diabetes'] = data1['cormobidity'].map(comorbidity_mapping)
#data1
#이렇게 하면 안됨!!!!!
One Hot Encoding 해줘야 함.
다음시간에 MNIST (0~9의 10class의 handwritten image) 가지고 DL 실습 해볼거임.
이런 게 one hot encoding임.
0~9는 human based이였는데
one hot encoding 하면 computer based로 바꿈.
5종류의 병명(stroke, asthma, none, hypertension, sleep apnea) 있으니까
comorbidity = pd.get_dummies(data1['comorbidity'])
comorbidity
| hypertension | none | sleep apnea | stroke | |
|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 1 |
| 2 | 0 | 1 | 0 | 0 |
| 3 | 1 | 0 | 0 | 0 |
| 4 | 0 | 0 | 1 | 0 |
pd.concat([data1, comorbidity], axis=1)
data1
| height | weight | gender | age | high pressure | low pressure | comorbidity | diabetes | |
|---|---|---|---|---|---|---|---|---|
| 0 | 150 | 50.0 | 0 | 10.0 | 100 | 50 | stroke | 0 |
| 2 | 170 | 70.0 | 1 | 30.0 | 120 | 70 | none | 1 |
| 3 | 180 | 80.0 | 1 | 40.0 | 130 | 80 | hypertension | 1 |
| 4 | 190 | 90.0 | 0 | 50.0 | 140 | 90 | sleep apnea | 0 |
이제 기존 comorbidity column 없애주자.
data1 = data1.drop(['comorbidity'], axis=1)
data1
| height | weight | gender | age | high pressure | low pressure | diabetes | |
|---|---|---|---|---|---|---|---|
| 0 | 150 | 50.0 | 0 | 10.0 | 100 | 50 | 0 |
| 2 | 170 | 70.0 | 1 | 30.0 | 120 | 70 | 1 |
| 3 | 180 | 80.0 | 1 | 40.0 | 130 | 80 | 1 |
| 4 | 190 | 90.0 | 0 | 50.0 | 140 | 90 | 0 |
By using one hot encoding, the number of features increase.
이게 ML의 마지막 class임. 이제 DL 배울거임.
linear regression -> regression problem
logistic regression -> classification problem
SVM, DecisionTree, RF, XGB -> both
import pandas as pd
train = pd.read_csv("/content/sample_data/california_housing_train.csv")
test = pd.read_csv("/content/sample_data/california_housing_test.csv")
train.head()
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | -114.31 | 34.19 | 15.0 | 5612.0 | 1283.0 | 1015.0 | 472.0 | 1.4936 | 66900.0 |
| 1 | -114.47 | 34.40 | 19.0 | 7650.0 | 1901.0 | 1129.0 | 463.0 | 1.8200 | 80100.0 |
| 2 | -114.56 | 33.69 | 17.0 | 720.0 | 174.0 | 333.0 | 117.0 | 1.6509 | 85700.0 |
| 3 | -114.57 | 33.64 | 14.0 | 1501.0 | 337.0 | 515.0 | 226.0 | 3.1917 | 73400.0 |
| 4 | -114.57 | 33.57 | 20.0 | 1454.0 | 326.0 | 624.0 | 262.0 | 1.9250 | 65500.0 |
train.tail()
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | |
|---|---|---|---|---|---|---|---|---|---|
| 16995 | -124.26 | 40.58 | 52.0 | 2217.0 | 394.0 | 907.0 | 369.0 | 2.3571 | 111400.0 |
| 16996 | -124.27 | 40.69 | 36.0 | 2349.0 | 528.0 | 1194.0 | 465.0 | 2.5179 | 79000.0 |
| 16997 | -124.30 | 41.84 | 17.0 | 2677.0 | 531.0 | 1244.0 | 456.0 | 3.0313 | 103600.0 |
| 16998 | -124.30 | 41.80 | 19.0 | 2672.0 | 552.0 | 1298.0 | 478.0 | 1.9797 | 85800.0 |
| 16999 | -124.35 | 40.54 | 52.0 | 1820.0 | 300.0 | 806.0 | 270.0 | 3.0147 | 94600.0 |
train.shape #9feature
(17000, 9)
type(train)
pandas.core.frame.DataFrame
Pd로 1D(series), 2D(dataframe), 3D 등 할 수 있음.
test.head(3)
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | -122.05 | 37.37 | 27.0 | 3885.0 | 661.0 | 1537.0 | 606.0 | 6.6085 | 344700.0 |
| 1 | -118.30 | 34.26 | 43.0 | 1510.0 | 310.0 | 809.0 | 277.0 | 3.5990 | 176500.0 |
| 2 | -117.81 | 33.78 | 27.0 | 3589.0 | 507.0 | 1484.0 | 495.0 | 5.7934 | 270500.0 |
test.tail(10)
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | |
|---|---|---|---|---|---|---|---|---|---|
| 2990 | -118.23 | 34.09 | 49.0 | 1638.0 | 456.0 | 1500.0 | 430.0 | 2.6923 | 150000.0 |
| 2991 | -117.17 | 34.28 | 13.0 | 4867.0 | 718.0 | 780.0 | 250.0 | 7.1997 | 253800.0 |
| 2992 | -122.33 | 37.39 | 52.0 | 573.0 | 102.0 | 232.0 | 92.0 | 6.2263 | 500001.0 |
| 2993 | -117.91 | 33.60 | 37.0 | 2088.0 | 510.0 | 673.0 | 390.0 | 5.1048 | 500001.0 |
| 2994 | -117.93 | 33.86 | 35.0 | 931.0 | 181.0 | 516.0 | 174.0 | 5.5867 | 182500.0 |
| 2995 | -119.86 | 34.42 | 23.0 | 1450.0 | 642.0 | 1258.0 | 607.0 | 1.1790 | 225000.0 |
| 2996 | -118.14 | 34.06 | 27.0 | 5257.0 | 1082.0 | 3496.0 | 1036.0 | 3.3906 | 237200.0 |
| 2997 | -119.70 | 36.30 | 10.0 | 956.0 | 201.0 | 693.0 | 220.0 | 2.2895 | 62000.0 |
| 2998 | -117.12 | 34.10 | 40.0 | 96.0 | 14.0 | 46.0 | 14.0 | 3.2708 | 162500.0 |
| 2999 | -119.63 | 34.42 | 42.0 | 1765.0 | 263.0 | 753.0 | 260.0 | 8.5608 | 500001.0 |
test.shape
(3000, 9)
type(test)
pandas.core.frame.DataFrame
합쳐서 다시 나눠보자
train = train.append(test)
train.shape
(20000, 9)
train.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 20000 entries, 0 to 2999 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 longitude 20000 non-null float64 1 latitude 20000 non-null float64 2 housing_median_age 20000 non-null float64 3 total_rooms 20000 non-null float64 4 total_bedrooms 20000 non-null float64 5 population 20000 non-null float64 6 households 20000 non-null float64 7 median_income 20000 non-null float64 8 median_house_value 20000 non-null float64 dtypes: float64(9) memory usage: 1.5 MB
train.isnull() #missing있는지 물어보는거
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | False | False | False | False | False | False | False | False | False |
| 1 | False | False | False | False | False | False | False | False | False |
| 2 | False | False | False | False | False | False | False | False | False |
| 3 | False | False | False | False | False | False | False | False | False |
| 4 | False | False | False | False | False | False | False | False | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2995 | False | False | False | False | False | False | False | False | False |
| 2996 | False | False | False | False | False | False | False | False | False |
| 2997 | False | False | False | False | False | False | False | False | False |
| 2998 | False | False | False | False | False | False | False | False | False |
| 2999 | False | False | False | False | False | False | False | False | False |
20000 rows × 9 columns
train.isnull().sum()
longitude 0 latitude 0 housing_median_age 0 total_rooms 0 total_bedrooms 0 population 0 households 0 median_income 0 median_house_value 0 dtype: int64
train.head()
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | -114.31 | 34.19 | 15.0 | 5612.0 | 1283.0 | 1015.0 | 472.0 | 1.4936 | 66900.0 |
| 1 | -114.47 | 34.40 | 19.0 | 7650.0 | 1901.0 | 1129.0 | 463.0 | 1.8200 | 80100.0 |
| 2 | -114.56 | 33.69 | 17.0 | 720.0 | 174.0 | 333.0 | 117.0 | 1.6509 | 85700.0 |
| 3 | -114.57 | 33.64 | 14.0 | 1501.0 | 337.0 | 515.0 | 226.0 | 3.1917 | 73400.0 |
| 4 | -114.57 | 33.57 | 20.0 | 1454.0 | 326.0 | 624.0 | 262.0 | 1.9250 | 65500.0 |
train.total_rooms #total_rooms라는 column(feature)의 값들 다 보기
0 5612.0
1 7650.0
2 720.0
3 1501.0
4 1454.0
...
2995 1450.0
2996 5257.0
2997 956.0
2998 96.0
2999 1765.0
Name: total_rooms, Length: 20000, dtype: float64
train['total_rooms'] #같음
0 5612.0
1 7650.0
2 720.0
3 1501.0
4 1454.0
...
2995 1450.0
2996 5257.0
2997 956.0
2998 96.0
2999 1765.0
Name: total_rooms, Length: 20000, dtype: float64
y : median_house_value는 answer(label, output)로, 분리(seperate)를 해주고 싶다. 주로 single value.
X : 나머지 여러개들은 feature. questions. 주로 vector
y = train.median_house_value
y
0 66900.0
1 80100.0
2 85700.0
3 73400.0
4 65500.0
...
2995 225000.0
2996 237200.0
2997 62000.0
2998 162500.0
2999 500001.0
Name: median_house_value, Length: 20000, dtype: float64
type(y) #1D이다. 그래서 Series로 나옴.
pandas.core.series.Series
X = train.drop(['median_house_value'], axis=1)
X
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | |
|---|---|---|---|---|---|---|---|---|
| 0 | -114.31 | 34.19 | 15.0 | 5612.0 | 1283.0 | 1015.0 | 472.0 | 1.4936 |
| 1 | -114.47 | 34.40 | 19.0 | 7650.0 | 1901.0 | 1129.0 | 463.0 | 1.8200 |
| 2 | -114.56 | 33.69 | 17.0 | 720.0 | 174.0 | 333.0 | 117.0 | 1.6509 |
| 3 | -114.57 | 33.64 | 14.0 | 1501.0 | 337.0 | 515.0 | 226.0 | 3.1917 |
| 4 | -114.57 | 33.57 | 20.0 | 1454.0 | 326.0 | 624.0 | 262.0 | 1.9250 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2995 | -119.86 | 34.42 | 23.0 | 1450.0 | 642.0 | 1258.0 | 607.0 | 1.1790 |
| 2996 | -118.14 | 34.06 | 27.0 | 5257.0 | 1082.0 | 3496.0 | 1036.0 | 3.3906 |
| 2997 | -119.70 | 36.30 | 10.0 | 956.0 | 201.0 | 693.0 | 220.0 | 2.2895 |
| 2998 | -117.12 | 34.10 | 40.0 | 96.0 | 14.0 | 46.0 | 14.0 | 3.2708 |
| 2999 | -119.63 | 34.42 | 42.0 | 1765.0 | 263.0 | 753.0 | 260.0 | 8.5608 |
20000 rows × 8 columns
X.shape
(20000, 8)
y.shape
(20000,)
이제 data split 하자
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=1)
# 위 두 줄은 외우는 걸 추천.
이제 Nomarlization 하자
from sklearn.preprocessing import StandardScaler #이건 class다.
scaler = StandardScaler() #instance만들기
scaler.fit(X_train) #평균, 표준편차 계산 each column 대해.
X_train = scaler.transform(X_train) #value 바꾸기
X_train = scaler.fit_transform(X_train) #로 한줄로도 가능.
X_test = scaler.fit_transform(X_test) #로 하면 안돼!
#scaler에 train data에 맞게 평균, 표준편차가 계산되었기 때문입니다
X_test = scaler.transform(X_test) #로 해야함.
/usr/local/lib/python3.7/dist-packages/sklearn/base.py:446: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names "X does not have valid feature names, but"
Nomarlization 방법2 (뭐가 더 낫다는 아님. 걍 옵션)
from sklearn.preprocessing import MinMaxScaler #이건 another nomalization method
#maximum value로 모든 value 나눠서 맥스가 1 되도록 함.
#(X-Min) / (Max-Min)
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
이제 learning시키자.
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)
LinearRegression()
prediction = model.predict(X_test)
sum(abs(prediction - y_test)) / len(y_test)
3904772.310203393
from sklearn.svm import SVR
model = SVR()
model.fit(X_train, y_train)
prediction = model.predict(X_test)
sum(abs(prediction - y_test)) / len(y_test)
86615.37364723098
from sklearn.tree import DecisionTreeRegressor
model = DecisionTreeRegressor()
model.fit(X_train, y_train)
prediction = model.predict(X_test)
sum(abs(prediction - y_test)) / len(y_test)
120058.69675
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
model.fit(X_train, y_train)
prediction = model.predict(X_test)
sum(abs(prediction - y_test)) / len(y_test)
93964.96685500002
from xgboost import XGBRegressor
model = XGBRegressor()
model.fit(X_train, y_train)
prediction = model.predict(X_test)
sum(abs(prediction - y_test)) / len(y_test)
[00:32:28] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.
97030.82971679687
from sklearn.neighbors import KNeighborsRegressor
model = KNeighborsRegressor()
model.fit(X_train, y_train)
prediction = model.predict(X_test)
sum(abs(prediction - y_test)) / len(y_test)
158361.77175
from tensorflow import keras
keras는 tensorflow 안에 있다.
tensorflow는 구글에서 개발
구글이 keras를 accept함.
TF 안에 keras 있음.
data= cancer['data']에서 'data'는 feature value
data = cancer['target'] 에서 'target'은 lable data
# Import Library & Load data & Data split
from sklearn.datasets import load_breast_cancer #30 features니까
from sklearn.model_selection import train_test_split
from tensorflow import keras
cancer = load_breast_cancer()
data = cancer['data']
target = cancer['target']
train_data, test_data, train_label, test_label = train_test_split(data, target, test_size = 0.2, random_state = 1)
# Deep Nural Network Model
model = keras.Sequential()
model.add(keras.layers.Dense(10,activation = 'relu', input_shape=(30,)))
#input layer에 30node(neurons)있음.
#Dense는 fully connected layer = Dense layer
# input_shape=(30,) 는 first layer after input layer 의 shape
model.add(keras.layers.Dense(1,activation='sigmoid'))
#output layer엔 0(malignant), or 1(B).
#binary classfication이므로 1 node.
#막 -600 -30 40 100 등 다양하게 나오는 값을 확률로 바꾸기 위해 sigmoid로 activation function 씀.
#마이너스무한대~무한대 값이 0~1 중 하나로 되는 게 sigmoid.
#혹은 이렇게 레이어 정보를 리스트로 넣어줘도 됨.
model = keras.Sequential([keras.layers.Dense(10,activation = 'relu', input_shape=(30,)),
keras.layers.Dense(1,activation='sigmoid') ])
#이렇게 간단하게 임포트 해두 된다!
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
model = Sequential()
model.add(Dense(10,activation = 'relu', input_shape=(30,)))
model.add(Dense(1,activation='sigmoid'))
keras는 Sequential이란 class 있고, model이란 변수에 instance 부여.
keras.layers.Dense(10, )는 1layer
지금은 2 layer 있다. (30 / 10 / 1), 310 weight values.
10의 의미 : 10node 있다는 뜻.
load_breast_cancer는 30feature 있다. 즉 30개의 input
each node has value.
input layer (안에 30 node, 각 노드는 value 가짐)
output layer는 1 node로, 0~1의 값. (M:0, B:1)
0.5보다 크면 B, 0.5보다 작으면 M
each node 는 다음꺼에 connect 됨.
다음 layer는 10node, 총 300개의 connection.
각 connection은 weight value 있다.
즉, w1~w300 가 있는 거다.
input layer의 각 노드 이름 x1, x2, .., x30
두번째 layer의 첫 노드는 w1x1 + w2x2 + ... + w30x30 + b1임.
두번째 layer의 두번째 노드는 w31x1 + w32x2 + ... + w60x30 + b2임.
세번째 layer(output layer)은 w301 ~ w310
즉 이거 전체 토탈 총 weight는 310개임.
bias는 총 10 + 1 해서 11개임.
value는 310 weight + 11 bias 해서 총 312개임.
#How to train the model?
model.compile(loss='BinaryCrossentropy', optimizer='adam', metrics=['accuracy'])
loss = 'BinaryCrossentropy' 는 cost라는 함수이고, error 찾는 거임.
우리는 train as the error decrease 해야함.
error 즉 loss 즉 cost는 decrease 되어야 함.
loss function이 'BinaryCrossentropy'임.
regression problem
learning rate인 알파가 클 수록 더 많이 w가 팍팍 바뀜.
Classification
loss = 'MeanSquaredError' 로 할수도 있지만 에러가 안준다. accuracy는 안는다.
loss='BinaryCrossentropy' 로 해줘야 함.
#Train the model
model.fit(train_data, train_label, batch_size=16, epochs=20)
Epoch 1/20 29/29 [==============================] - 2s 4ms/step - loss: 81.8523 - accuracy: 0.6264 Epoch 2/20 29/29 [==============================] - 0s 4ms/step - loss: 17.9834 - accuracy: 0.3451 Epoch 3/20 29/29 [==============================] - 0s 4ms/step - loss: 8.3961 - accuracy: 0.1297 Epoch 4/20 29/29 [==============================] - 0s 4ms/step - loss: 3.9759 - accuracy: 0.2637 Epoch 5/20 29/29 [==============================] - 0s 4ms/step - loss: 1.3054 - accuracy: 0.5121 Epoch 6/20 29/29 [==============================] - 0s 4ms/step - loss: 0.4725 - accuracy: 0.7846 Epoch 7/20 29/29 [==============================] - 0s 4ms/step - loss: 0.3794 - accuracy: 0.8527 Epoch 8/20 29/29 [==============================] - 0s 4ms/step - loss: 0.4338 - accuracy: 0.8396 Epoch 9/20 29/29 [==============================] - 0s 4ms/step - loss: 0.3151 - accuracy: 0.8923 Epoch 10/20 29/29 [==============================] - 0s 4ms/step - loss: 0.3053 - accuracy: 0.8769 Epoch 11/20 29/29 [==============================] - 0s 4ms/step - loss: 0.2778 - accuracy: 0.9011 Epoch 12/20 29/29 [==============================] - 0s 4ms/step - loss: 0.2736 - accuracy: 0.8923 Epoch 13/20 29/29 [==============================] - 0s 4ms/step - loss: 0.3143 - accuracy: 0.8835 Epoch 14/20 29/29 [==============================] - 0s 4ms/step - loss: 0.2774 - accuracy: 0.8989 Epoch 15/20 29/29 [==============================] - 0s 4ms/step - loss: 0.3136 - accuracy: 0.8945 Epoch 16/20 29/29 [==============================] - 0s 4ms/step - loss: 0.2361 - accuracy: 0.9077 Epoch 17/20 29/29 [==============================] - 0s 4ms/step - loss: 0.2655 - accuracy: 0.8989 Epoch 18/20 29/29 [==============================] - 0s 4ms/step - loss: 0.3042 - accuracy: 0.8923 Epoch 19/20 29/29 [==============================] - 0s 4ms/step - loss: 0.2895 - accuracy: 0.8769 Epoch 20/20 29/29 [==============================] - 0s 4ms/step - loss: 0.2444 - accuracy: 0.9077
<keras.callbacks.History at 0x7fbfc387cc10>
train_data.shape
(455, 30)
#Evaluate
(loss,acc) = model.evaluate(test_data, test_label)
4/4 [==============================] - 0s 4ms/step - loss: 0.2968 - accuracy: 0.8860
optimizer
우리는 model을 gradient descent method로 할 거임.
Gradient Descent Method는
만약 loss function이 있다면, 모든 possible value 에 대해 plot number 할 수 있고(U자), 가장 작은 게 만약 weight가 4라면, lowest weight value인 것임.
만약 million weight value가 있다면,
1000개
million x 1000 = billion 개 되는거임.
이렇게 연산량 넘 많아지기에 최적값 찾을 수 없음. 그래서 Gradient descent method 필요함.
1) assign w value randomly.
2) 그 점에서 미분(derivate) 하자. 그럼 순간기울기 slope 얻을 수 있다. 그게 negative면 w value를 increase 해야함.
3) 이런 과정 계속 반복. 이렇게 해서 optimal weight value 얻음.
파이썬 코딩에서, w = w - g
g가 음수면 w 증가, g가 양수면 w 감소.
optimizer = 'adam'이 이 gradient descent method라고 해주는 거임.
근데 일케 되면 local minimum에 빠짐. global minimum으로 가야 하는데....
그래서 adam algorithm은 가속도를 구함.
공 굴려서 저기서 멈춤. 그나마 한계점 보완 가능. 공이 언덕 뛰어넘는거도 해가지구.
metrics = ['accuracy'] 해서 정확도 monitoring 가능.
train the model
batch_size는
16 gradient value 있고, 그거 평균해서 g 구함.
이게 stochastic GD
1 batch가 16data 가짐.
train_data가 455개니까 455/16 = 28.4 몇이니까 28번 해보는거임.
1 epoch : 28 times update
2 epoch : 28*2 times update
20 epoch : 28*20 times update
# Import Library & Load data & Data split
from sklearn.datasets import load_breast_cancer #30 features니까
from sklearn.model_selection import train_test_split
from tensorflow import keras
cancer = load_breast_cancer()
data = cancer['data']
target = cancer['target']
train_data, test_data, train_label, test_label = train_test_split(data, target, test_size = 0.2, random_state = 1)
# Deep Nural Network Model
model = keras.Sequential()
model.add(keras.layers.Dense(20,activation = 'relu', input_shape=(30,))) #input layer에 30node있음.
model.add(keras.layers.Dense(10,activation = 'relu' ))
model.add(keras.layers.Dense(5,activation = 'relu' ))
model.add(keras.layers.Dense(1,activation='sigmoid')) #output layer엔 0(malignant), or 1(B).
#How to train the model?
model.compile(loss='BinaryCrossentropy', optimizer='adam', metrics=['accuracy'])
#Train the model
model.fit(train_data, train_label, batch_size=16, epochs=50)
#Evaluate
(loss,acc) = model.evaluate(test_data, test_label)
Epoch 1/50 29/29 [==============================] - 1s 5ms/step - loss: 49.0800 - accuracy: 0.6264 Epoch 2/50 29/29 [==============================] - 0s 4ms/step - loss: 5.5108 - accuracy: 0.5956 Epoch 3/50 29/29 [==============================] - 0s 4ms/step - loss: 0.8618 - accuracy: 0.3714 Epoch 4/50 29/29 [==============================] - 0s 4ms/step - loss: 0.7345 - accuracy: 0.3033 Epoch 5/50 29/29 [==============================] - 0s 5ms/step - loss: 0.7172 - accuracy: 0.3714 Epoch 6/50 29/29 [==============================] - 0s 4ms/step - loss: 0.6934 - accuracy: 0.3758 Epoch 7/50 29/29 [==============================] - 0s 4ms/step - loss: 0.6777 - accuracy: 0.4484 Epoch 8/50 29/29 [==============================] - 0s 4ms/step - loss: 0.6709 - accuracy: 0.4835 Epoch 9/50 29/29 [==============================] - 0s 4ms/step - loss: 0.6484 - accuracy: 0.5824 Epoch 10/50 29/29 [==============================] - 0s 4ms/step - loss: 0.6355 - accuracy: 0.6198 Epoch 11/50 29/29 [==============================] - 0s 4ms/step - loss: 0.6249 - accuracy: 0.5363 Epoch 12/50 29/29 [==============================] - 0s 5ms/step - loss: 0.6148 - accuracy: 0.7692 Epoch 13/50 29/29 [==============================] - 0s 4ms/step - loss: 0.6038 - accuracy: 0.6549 Epoch 14/50 29/29 [==============================] - 0s 5ms/step - loss: 0.5958 - accuracy: 0.7604 Epoch 15/50 29/29 [==============================] - 0s 4ms/step - loss: 0.5862 - accuracy: 0.8044 Epoch 16/50 29/29 [==============================] - 0s 4ms/step - loss: 0.5772 - accuracy: 0.7363 Epoch 17/50 29/29 [==============================] - 0s 4ms/step - loss: 0.5648 - accuracy: 0.8352 Epoch 18/50 29/29 [==============================] - 0s 4ms/step - loss: 0.5561 - accuracy: 0.8659 Epoch 19/50 29/29 [==============================] - 0s 5ms/step - loss: 0.5531 - accuracy: 0.8505 Epoch 20/50 29/29 [==============================] - 0s 4ms/step - loss: 0.5420 - accuracy: 0.9011 Epoch 21/50 29/29 [==============================] - 0s 4ms/step - loss: 0.5302 - accuracy: 0.8615 Epoch 22/50 29/29 [==============================] - 0s 4ms/step - loss: 0.5195 - accuracy: 0.9099 Epoch 23/50 29/29 [==============================] - 0s 4ms/step - loss: 0.5151 - accuracy: 0.8989 Epoch 24/50 29/29 [==============================] - 0s 4ms/step - loss: 0.5088 - accuracy: 0.8879 Epoch 25/50 29/29 [==============================] - 0s 4ms/step - loss: 0.4980 - accuracy: 0.9077 Epoch 26/50 29/29 [==============================] - 0s 4ms/step - loss: 0.4826 - accuracy: 0.9033 Epoch 27/50 29/29 [==============================] - 0s 4ms/step - loss: 0.4294 - accuracy: 0.9055 Epoch 28/50 29/29 [==============================] - 0s 4ms/step - loss: 0.3385 - accuracy: 0.9055 Epoch 29/50 29/29 [==============================] - 0s 4ms/step - loss: 0.3181 - accuracy: 0.8923 Epoch 30/50 29/29 [==============================] - 0s 4ms/step - loss: 0.2768 - accuracy: 0.9055 Epoch 31/50 29/29 [==============================] - 0s 4ms/step - loss: 0.2868 - accuracy: 0.8923 Epoch 32/50 29/29 [==============================] - 0s 4ms/step - loss: 0.2841 - accuracy: 0.9099 Epoch 33/50 29/29 [==============================] - 0s 5ms/step - loss: 0.2340 - accuracy: 0.9231 Epoch 34/50 29/29 [==============================] - 0s 4ms/step - loss: 0.2221 - accuracy: 0.9275 Epoch 35/50 29/29 [==============================] - 0s 5ms/step - loss: 0.2627 - accuracy: 0.9099 Epoch 36/50 29/29 [==============================] - 0s 5ms/step - loss: 0.2252 - accuracy: 0.9165 Epoch 37/50 29/29 [==============================] - 0s 4ms/step - loss: 0.2010 - accuracy: 0.9187 Epoch 38/50 29/29 [==============================] - 0s 4ms/step - loss: 0.2095 - accuracy: 0.9209 Epoch 39/50 29/29 [==============================] - 0s 4ms/step - loss: 0.2212 - accuracy: 0.9143 Epoch 40/50 29/29 [==============================] - 0s 4ms/step - loss: 0.1974 - accuracy: 0.9165 Epoch 41/50 29/29 [==============================] - 0s 4ms/step - loss: 0.1920 - accuracy: 0.9231 Epoch 42/50 29/29 [==============================] - 0s 5ms/step - loss: 0.2037 - accuracy: 0.9253 Epoch 43/50 29/29 [==============================] - 0s 4ms/step - loss: 0.2098 - accuracy: 0.9231 Epoch 44/50 29/29 [==============================] - 0s 4ms/step - loss: 0.2106 - accuracy: 0.9099 Epoch 45/50 29/29 [==============================] - 0s 4ms/step - loss: 0.1930 - accuracy: 0.9187 Epoch 46/50 29/29 [==============================] - 0s 4ms/step - loss: 0.2136 - accuracy: 0.9033 Epoch 47/50 29/29 [==============================] - 0s 4ms/step - loss: 0.2184 - accuracy: 0.9077 Epoch 48/50 29/29 [==============================] - 0s 5ms/step - loss: 0.1869 - accuracy: 0.9253 Epoch 49/50 29/29 [==============================] - 0s 4ms/step - loss: 0.1835 - accuracy: 0.9187 Epoch 50/50 29/29 [==============================] - 0s 5ms/step - loss: 0.2171 - accuracy: 0.9231 4/4 [==============================] - 0s 5ms/step - loss: 0.2105 - accuracy: 0.9035
model.summary()
Model: "sequential_3"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_6 (Dense) (None, 20) 620
dense_7 (Dense) (None, 10) 210
dense_8 (Dense) (None, 5) 55
dense_9 (Dense) (None, 1) 6
=================================================================
Total params: 891
Trainable params: 891
Non-trainable params: 0
_________________________________________________________________
weight는 30x20 + 20x10 + 10x5 + 5x1
bias는 20 + 10 + 5 + 1
다 더해서 891 parameters
input layer shape는 (30,) 로, 1D임.
node에 들어오는 거 계산할 때 w랑 x 곱한거의 summation 뿐만 아니라
그거를 activation funcion 안에 넣어줘야 함. 그래야 output이 됨.
step function, sigmoid function(이건 확률로 계산해줌), relu function(rectified linear unit function) 등.
leaky relu는 x가 음수일 때 기울기 0.1~0.3 쯤인(1보다 작은) 거)
Exponential Relu = E relu
most algorithms는 backend에서 이루어짐. store/process data.
frontend는 for communicate with human. website 등.
keras는 high-level API로, frontend이다. no backend.
convolutional layer, flattening layer, drop out 등의 복잡한 algorithm이 backend에 잇다.
keras는 pug in to backend.
Torch, Theano, Caffe는 backend까지 사람이 해야함(?). core algorithm이 이미 있다. very complicated algorithms.
keras 할 때 torch, theano, caffe 중 선택option 있다.
theano가 backend program으로서 디폴트이다.
keras is super easy. froentend만.
tensorflow(google) 은 frontend, backend 둘 다.
사람들이 keras로 많이 함. 그래서 구글 빡침(?).
구글도 결국 인정.
원래 keras랑 tensorflow랑 완전 independent인데,
tensorflow 안에 library로 keras가 있다.
그래서 from tensorflow import keras 로 씀.
keras도 단독으로 쓸 수 있다.
import keras 로
Tensorflow ( + Keras ) vs. Pytorch
Tensorflow 가 더 easy.
둘은 same result 냄.
pytorch는 every layer마다 그것의 인풋 꺼의 사이즈 정해줘야 함. 좀 더 difficult
matplotlib
#import Library
from tensorflow import keras
import matplotlib.pyplot as plt # from matplotlib import pyplot as plt
# keras.datasets. #하면 7가지 예제 있다. boston_housing 등
(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz 11493376/11490434 [==============================] - 0s 0us/step 11501568/11490434 [==============================] - 0s 0us/step
train_images.shape #3Dimension 이다.
(60000, 28, 28)
axis0에 60000개, axis1에 28, axis2에 28개의 data
type(train_images)
numpy.ndarray
print(train_images.shape, train_labels.shape, test_images.shape, test_labels.shape)
(60000, 28, 28) (60000,) (10000, 28, 28) (10000,)
즉 train data는 60000개의 image 있다.
test data엔 10000개의 image 있다.
print(type(train_images), type(train_labels), type(test_images), type(test_labels))
<class 'numpy.ndarray'> <class 'numpy.ndarray'> <class 'numpy.ndarray'> <class 'numpy.ndarray'>
이미지를 한 번 봐보자.
train_images[0].shape
(28, 28)
plt.imshow(train_images[0])
plt.show() #google colab에선 이거 없어도 됨.
plt.imshow(train_images[1])
<matplotlib.image.AxesImage at 0x7fbfc30d4650>
plt.imshow(train_images[59999])
<matplotlib.image.AxesImage at 0x7fbfc3048850>
28 by 28의 gray scale(흑백) 이미지인데 왜 컬러로 보일까?
RGB 의 3채널 있어야 컬러로 보이는건데?
plt.imshow(train_images[59999], cmap='gray')
<matplotlib.image.AxesImage at 0x7fbfbf1fe810>
plt.imshow(train_images[59999], cmap='gray')
plt.xticks([1,2,3,4,5,6,7])
plt.yticks([]) #하면 축번호 없어짐 (empty list)
plt.title("MNIST Number")
plt.xlabel(f'True value is {train_labels[59999]}')
plt.ylabel('28 x 28 image ')
plt.show()
train_labels[0]
5
train_labels[59999]
8
plt.figure(figsize = (6,6)) # 이거하면 overlap 되는 거 없앨 수 있음.
plt.plot(2,3) #row, column
#for i in range(2,6) : 면 2,3,4,5 까지
for i in range(6) : # 0,1,2,3,4,5 까지
plt.subplot(2,3,i+1)
plt.imshow(train_images[i], cmap='gray')
plt.xticks([])
plt.yticks([])
plt.title("MNIST Number")
plt.xlabel(f'True value is {train_labels[i]}')
plt.ylabel('28 x 28 image ')
plt.show() #이건 맨 마지막에 한번만 써야 함.
Data Normalization 해서 데이터량 줄이자.
각 픽셀이 0~255값이였던 걸
0~1로 줄이자.
#Data Normalization
train_images = train_images / 255.0
test_images = test_images / 255.0
#Modeling
model = keras.Sequential()
#model.add(keras.layers.Dense(10, activation='relu', input_shape=(28, 28)))
#2D를 1D로 바로 하는 건 불가능하므로 flatten 먼저 해주어야 한다. 2D->1D
#28 x 28 = 784개의 node 갖는 것으로.
model.add(keras.layers.Flatten(input_shape = (28, 28)))
#hidden layer
model.add(keras.layers.Dense(128, activation='relu')) #Dense는 fully connected layer
model.add(keras.layers.Dense(64, activation='relu'))
#output layer : one hot encoding : 0~9 중 머가 확률 높은지!
model.add(keras.layers.Dense(10, activation='softmax'))
#sigmoid도 가능 : -무~+무를 확률(-1~+1)로 바꿈.
#softmax는 각 값을 probability로 바꿈, 그리고 다 합치면 summation은 1이겠지
# 그리고 exp(결과) /( summation (exp(결과)) ) 가 그 해당 결과의 probability임.
1) binary classification인데
(1) output이 1 node면 sigmoid 를 activation function으로 씀.
(2) output이 2 node 이상이면 softmax 를 activation function으로 씀.
//둘의 결과는 같음
2) multiple classification이면
softmax 를 activation function으로 씀.
model.summary()
#batch size는 아직 none임.
Model: "sequential_4"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten (Flatten) (None, 784) 0
dense_10 (Dense) (None, 128) 100480
dense_11 (Dense) (None, 64) 8256
dense_12 (Dense) (None, 10) 650
=================================================================
Total params: 109,386
Trainable params: 109,386
Non-trainable params: 0
_________________________________________________________________
model.compile(loss = 'SparseCategoricalCrossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(train_images, train_labels, batch_size = 128, epochs=10)
#loss 줄어들고 accuracy 늘어난다 점점.
Epoch 1/10 469/469 [==============================] - 2s 4ms/step - loss: 0.3419 - accuracy: 0.9026 Epoch 2/10 469/469 [==============================] - 2s 4ms/step - loss: 0.1395 - accuracy: 0.9588 Epoch 3/10 469/469 [==============================] - 2s 4ms/step - loss: 0.0965 - accuracy: 0.9712 Epoch 4/10 469/469 [==============================] - 2s 4ms/step - loss: 0.0724 - accuracy: 0.9784 Epoch 5/10 469/469 [==============================] - 2s 4ms/step - loss: 0.0592 - accuracy: 0.9819 Epoch 6/10 469/469 [==============================] - 2s 4ms/step - loss: 0.0460 - accuracy: 0.9864 Epoch 7/10 469/469 [==============================] - 2s 4ms/step - loss: 0.0401 - accuracy: 0.9880 Epoch 8/10 469/469 [==============================] - 2s 4ms/step - loss: 0.0318 - accuracy: 0.9902 Epoch 9/10 469/469 [==============================] - 2s 4ms/step - loss: 0.0260 - accuracy: 0.9920 Epoch 10/10 469/469 [==============================] - 2s 4ms/step - loss: 0.0217 - accuracy: 0.9937
<keras.callbacks.History at 0x7fbfbefca510>
loss, acc = model.evaluate(test_images, test_labels)
313/313 [==============================] - 1s 3ms/step - loss: 0.0840 - accuracy: 0.9784
Binary Crossentropy는
Categorical crossentropy는 more than two classes. 3,4,5,6, ....
=> multi classification
만약 output layer이 10node면, 그중 가장 확률 높은걸 결과로 침. softmax 등 써서.
이렇게 하면 loss 값이 0.1549 임.
gradient descnet method로 L을 w1로 편미분해서 learning rate를 곱해서 그것으로부터 빼서 w1을 계속 업데이트.
L(p, True label)은 weight에 대한 함수이다.
맨마지막 output layer꺼를 L 취하는건데 앞에 weights 엄청엄청 많으니까 chain rule 써서 하는 게 있음.
Back propagation 방식임.
Chain rule (연쇄법칙)
a=3
(a == 3)
True
a == 4
False
a != 3
False
a !=4
True
print((a>3), (a>=3), a<3, a<=3)
False True False True
a = 5==5
a
True
'abc' == "abcd"
False
'2' == 2
False
2.0 == 2
True
and, or, not
1<2 and 2<3 # true and true
True
1<2 or 3<2 # true or false
True
not 1<2
False
(1<2) 이런 식으로 괄호 쓰는게 확실함
a=1
b=10
if a<b :
print("a is smaller than b")
elif a==b :
print("a is equal to b")
else :
print("a is not smaller than b")
print("hello")
a is smaller than b hello
a=10
b=10
if a<b :
print("a is smaller than b")
elif a==b :
print("a is equal to b")
else :
print("a is not smaller than b")
print("hello")
a is equal to b hello
a=100
b=10
if a<b :
print("a is smaller than b")
elif a==b :
print("a is equal to b")
else :
print("a is not smaller than b")
print("hello")
a is not smaller than b hello
for item_name (temoprary) in sequence (list, tuple, dictionary, set)
for i in [1,2,3,4,5,6] :
print(i)
1 2 3 4 5 6
for i in ['hello', 'gyurin'] :
print(i)
hello gyurin
for i in ('hello', 'gyurin') :
print(i)
hello gyurin
for i in {'hello', 'gyurin'} :
print(i)
gyurin hello
for i in {'hello':100, 'python':10000, 'good':50}:
print(i)
hello python good
for i in {'hello':100, 'python':10000, 'good':50}.items():
print(i)
('hello', 100)
('python', 10000)
('good', 50)
for i in {'hello':100, 'python':10000, 'good':50}.keys():
print(i)
hello python good
for i in {'hello':100, 'python':10000, 'good':50}.values():
print(i)
100 10000 50
for i in 'hello' :
print(i)
h e l l o
prices = {'apples':0.40, 'orange':0.35, 'banana':0.25}
for key in list( prices.keys() ):
if key == 'orange' :
del prices[key]
print(prices)
{'apples': 0.4, 'banana': 0.25}
prices = {'apples':0.40, 'orange':0.35, 'banana':0.25}
cnt = 0
for value in list( prices.values() ):
if value > 0.35 :
cnt = cnt+1
print(cnt)
1
x = 0
while x<5 :
print(x)
x = x+1
else :
print("while loop is finished")
0 1 2 3 4 while loop is finished
for i in range(4) :
print(i)
0 1 2 3
range(5)
range(0, 5)
list(range(5))
[0, 1, 2, 3, 4]
for i in range(1, 7) :
print(i)
1 2 3 4 5 6
for i in range(1, 10, 2) : #start, finish, interval
print(i)
1 3 5 7 9
for i in enumerate(range(100,110)) : #index도 같이 알려줌.
print(i)
(0, 100) (1, 101) (2, 102) (3, 103) (4, 104) (5, 105) (6, 106) (7, 107) (8, 108) (9, 109)
for i, j in enumerate(range(100,110)) :
print(i, j)
0 100 1 101 2 102 3 103 4 104 5 105 6 106 7 107 8 108 9 109
list1 = [1,2,3,4,5]
list2 = [7,8,9]
str1 = "abcd"
for i,j,k in zip(list1, list2, str1):
print(i,j,k)
1 7 a 2 8 b 3 9 c
# something in a sequence
a=1
b=[1,2,3]
a in b
True
#list comprehension
mystr = "hello"
mylist = []
for i in mystr :
mylist.append(i)
mylist
['h', 'e', 'l', 'l', 'o']
yourstr='hello2'
yourlist = [i for i in yourstr]
yourlist
['h', 'e', 'l', 'l', 'o', '2']
mylist = []
for i in range(10):
if i%2 ==0 :
mylist.append(i)
mylist
[0, 2, 4, 6, 8]
mylist =[i for i in range(10) if i%2 == 0]
mylist
[0, 2, 4, 6, 8]
mylist = [i*10 for i in range(10) if i%5 ==0]
mylist
[0, 50]
5%5
0
#Nested loops with for and/or while
for i in [1,2,3] :
for j in [10,20,30] :
print(i+j)
11 21 31 12 22 32 13 23 33
added_list = [ i+j for i in [1,2,3] for j in [10,20,30] if i+j>20 ]
added_list
[21, 31, 22, 32, 23, 33]
list comprehension 이 너무 복잡해도 오히려 그건 나쁨. 적당~한거에서만 쓰자
break : breaks out of the current closest enclosing loop (finish the loop)
continue : goes to the top of the closest enclosing loop (do not execute only one, and continue)
pass : does nothing
a=[1,2,3,4,5]
for i in a:
if i==3 :
break
print(i)
1 2
i=1
while i <6 :
if i ==3 :
break
print(i)
i= i+1
1 2
a=[1,2,3,4,5]
for i in a:
if i==3 :
continue
print(i)
1 2 4 5
i=0
while i <5 :
i=i+1
if i==3 :
continue
print(i)
1 2 4 5
a=[1,2,3]
for i in a :
pass
#do something
def my_function() :
pass
CNN (convolutional neuron network) also include fully connected layer.
Flatten 말고 Conv2D 쓴다.
#import Library
from tensorflow import keras
import matplotlib.pyplot as plt # from matplotlib import pyplot as plt
# keras.datasets. #하면 7가지 예제 있다. boston_housing 등
(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()
#Data Normalization : 0~255 scale to 0~1
train_images = train_images / 255.0
test_images = test_images / 255.0
이렇게 간단히 두 줄 쓰거나 아니면 밑에처럼 구해두 되긴함
train_images.shape
(60000, 28, 28)
흑백이니까 1 channel, 그래서 Conv2D(input_shape = (28, 28. 1))
만약 color image면 RGB의 3 channel이니까 Conv2D(input_shape = (28, 28, 3))
(60000, 28, 28) 를 (60000, 28, 28, 1로 우선 만들어주어야 한다.)
tensorflow/keras는 channel last
pytorch는 channel first
로 코드를 짠다.
type(train_images)
numpy.ndarray
train_images = train_images.reshape(60000, 28, 28, 1)
train_images.shape
(60000, 28, 28, 1)
테스트꺼도 바꿔주자.
test_images = test_images.reshape(10000, 28, 28, 1)
test_images.shape
(10000, 28, 28, 1)
#Modeling
model = keras.Sequential()
model.add(keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation = 'relu', padding='same' ,input_shape = (28, 28, 1)))
model.add(keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2)))
이거는 one filter가 only 9 weights이다.
filter안 0or1은 랜덤임
filter 개수 32니까
(26, 26, 32) 로 output이 filed 될 것이다.
padding='same'이면,
padding은 output을 26by26이 아니라 28 by 28로 만들게 함.
input을 0으로 쫌 더 채워서 30by30로 만들어서!
입력과 출력 사이즈 같게 하려고!
가에 부분은 중요한 정보 없다고 걍 가정하고,
이미지 사이즈 유지하려고!
pool_size는 몇개 박스 중에서 고를건지, stride는 건너뛰기 간격
이건 부분부분에서 main characteristic을 찾기 위해 이걸 하는 거다.
4by4가 2by2로 됨.
4 times faster, complexity down, speed up, find main chracteristic (features) up
model.add(keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu'))
model.add(keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2)))
한번 더 해줌.
근데 만약 tensorflow(keras) 말고 pytorch로 하면 kernel_size를 3, 3, 32 로 써줘야 함.
11x11에서 마지막 row와 column은 계산되지 않는다. 그래서 5x5 됨.
model.add(keras.layers.Flatten())
#fully connected layer 은 end of the layer에 항상 있다.
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(64, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))
model.summary()
Model: "sequential_5"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 28, 28, 32) 320
max_pooling2d (MaxPooling2D (None, 14, 14, 32) 0
)
conv2d_1 (Conv2D) (None, 14, 14, 64) 18496
max_pooling2d_1 (MaxPooling (None, 7, 7, 64) 0
2D)
flatten_1 (Flatten) (None, 3136) 0
dense_13 (Dense) (None, 128) 401536
dense_14 (Dense) (None, 64) 8256
dense_15 (Dense) (None, 10) 650
=================================================================
Total params: 429,258
Trainable params: 429,258
Non-trainable params: 0
_________________________________________________________________
model.compile(loss = 'SparseCategoricalCrossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(train_images, train_labels, batch_size = 128, epochs=10)
#loss 줄어들고 accuracy 늘어난다 점점.
Epoch 1/10 469/469 [==============================] - 12s 10ms/step - loss: 0.2169 - accuracy: 0.9352 Epoch 2/10 469/469 [==============================] - 4s 9ms/step - loss: 0.0580 - accuracy: 0.9822 Epoch 3/10 469/469 [==============================] - 4s 9ms/step - loss: 0.0381 - accuracy: 0.9881 Epoch 4/10 469/469 [==============================] - 4s 9ms/step - loss: 0.0299 - accuracy: 0.9907 Epoch 5/10 469/469 [==============================] - 4s 9ms/step - loss: 0.0235 - accuracy: 0.9925 Epoch 6/10 469/469 [==============================] - 4s 9ms/step - loss: 0.0178 - accuracy: 0.9943 Epoch 7/10 469/469 [==============================] - 4s 9ms/step - loss: 0.0160 - accuracy: 0.9948 Epoch 8/10 469/469 [==============================] - 4s 9ms/step - loss: 0.0126 - accuracy: 0.9958 Epoch 9/10 469/469 [==============================] - 4s 9ms/step - loss: 0.0090 - accuracy: 0.9972 Epoch 10/10 469/469 [==============================] - 4s 9ms/step - loss: 0.0104 - accuracy: 0.9964
<keras.callbacks.History at 0x7fbfbee84310>
loss, acc = model.evaluate(test_images, test_labels)
313/313 [==============================] - 2s 4ms/step - loss: 0.0283 - accuracy: 0.9918
fully connected layer는 inefficient하다.
a11과 a1 20000가 관계 없어서.
7by7 이미지, 3channel인 거에다가
one filter 를 곱한다.
filter의 각 값들은 weight이다.
randomly assigned number이다.
0x3 + 1x1 + 0x4 + 1x4 + 1x0 + 1x1 + 0x2 + ... = 3이다. (27 terms 더한거!)
이렇게 해서 1 얻음.
계속계속 해서 5by5 image 얻게 됨.
image가 smaller 되었다.
필터 사이즈 3by3은 내가 정하는 것! (5,5), (7,7) 등 보통은 odd number로 한다.
이 경우, filter size == kernel_size == (3,3) 이다.
이미지가 3channel이므로 filter도 자동적으로 3channel이다.
우리가 코딩에서 (3,3)만 정해주면 tensorflow가 automatically (3,3,3)로 맞춰줌.
이 필터는 3channel이니까 weight가 총 27개인 셈이다.
output size는 5 by 5 이다. (5,5,1)
Conv2D(filters=64, kernel_size=(3,3), ....) 에서
3x3x3의 one filter가 총 64개 있단 말임.
one filter는 27 weights 있고, (5,5,1) 얻으므로
총 64개의 (5,5,1)사이즈의 output 얻음.
즉, (5,5,64)이고, 이거를 activation map이라 함.
#import Library
from tensorflow import keras
import matplotlib.pyplot as plt # from matplotlib import pyplot as plt
# keras.datasets. #하면 7가지 예제 있다. boston_housing 등
(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()
#Data Normalization : 0~255 scale to 0~1
train_images = train_images / 255.0
test_images = test_images / 255.0
train_images = train_images.reshape(60000, 28, 28, 1)
test_images = test_images.reshape(10000, 28, 28, 1)
#Modeling
model = keras.Sequential()
model.add(keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation = 'relu', padding='same' ,input_shape = (28, 28, 1)))
model.add(keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu'))
model.add(keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(keras.layers.Flatten())
#fully connected layer 은 end of the layer에 항상 있다.
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(64, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))
print(model.summary())
model.compile(loss = 'SparseCategoricalCrossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(train_images, train_labels, batch_size = 128, epochs=10)
#loss 줄어들고 accuracy 늘어난다 점점.
#epochs 늘리면 overfiting 될 수도.
loss, acc = model.evaluate(test_images, test_labels)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 28, 28, 32) 320
max_pooling2d (MaxPooling2D (None, 14, 14, 32) 0
)
conv2d_1 (Conv2D) (None, 14, 14, 64) 18496
max_pooling2d_1 (MaxPooling (None, 7, 7, 64) 0
2D)
flatten (Flatten) (None, 3136) 0
dense (Dense) (None, 128) 401536
dense_1 (Dense) (None, 64) 8256
dense_2 (Dense) (None, 10) 650
=================================================================
Total params: 429,258
Trainable params: 429,258
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/10
469/469 [==============================] - 6s 9ms/step - loss: 0.2094 - accuracy: 0.9364
Epoch 2/10
469/469 [==============================] - 4s 9ms/step - loss: 0.0535 - accuracy: 0.9836
Epoch 3/10
469/469 [==============================] - 4s 9ms/step - loss: 0.0363 - accuracy: 0.9884
Epoch 4/10
469/469 [==============================] - 4s 9ms/step - loss: 0.0265 - accuracy: 0.9917
Epoch 5/10
469/469 [==============================] - 4s 9ms/step - loss: 0.0209 - accuracy: 0.9936
Epoch 6/10
469/469 [==============================] - 4s 9ms/step - loss: 0.0166 - accuracy: 0.9945
Epoch 7/10
469/469 [==============================] - 4s 9ms/step - loss: 0.0134 - accuracy: 0.9956
Epoch 8/10
469/469 [==============================] - 4s 9ms/step - loss: 0.0118 - accuracy: 0.9963
Epoch 9/10
469/469 [==============================] - 4s 9ms/step - loss: 0.0101 - accuracy: 0.9967
Epoch 10/10
469/469 [==============================] - 4s 9ms/step - loss: 0.0089 - accuracy: 0.9969
313/313 [==============================] - 1s 4ms/step - loss: 0.0378 - accuracy: 0.9898
데이터가 이런 식으로 있으면
이렇게 하고 싶은데, 너무 많이 train하면
이렇게 된다. 이 모델은 이 training data에 대해서만 적용되는거다...
train data가 million, multi million이라고 할 지라도 real data에 비해선 아주 일부이다.
그래서 generalize the model 하는 게 중요하다.
그 모델이 whole data를 다 커버하도록 만들어지는 게 중요하다.
model.fit(train_images, train_labels, batch_size = 128, epochs=50)
epochs 늘리면 overfiting 될 수도.
loss를 0퍼, accuracy를 100퍼로 맞춘다 해도, 이건 training data 에 한정해서만 이니까,
test data에서는 오히려 아닌거임.
즉,
loss, acc = model.evaluate(test_images, test_labels)
이거 실행해보면 아니라는 거지.
epoch을 적당~히 해야한다는거임.
best epoch는 멀까?
epoch을 3이라고 정해서 model 정해도,
test data가 그 training에 적용이 되어 있는 것임.
test data는 model selection 시 totally isolated 되어야 한다.
이렇게 train data를
real train data / validation data로 나눠야 한다.
보통 8:2, 7:3, 9:1 이렇게 함.
validation data의 목적은 choose the best model
test data의 목적은 evaluation 목적
model.fit(train_images, train_labels, batch_size = 128, validation_split=0.2, epochs=20)
이렇게 하면 train data를
real train data가 뒤쪽 80퍼
validation data가 맨 앞에있는 20퍼로 되는 거임.
만약 train data가 randomly splited 되어 있지 않으면 이거 한 번 섞어주고 real train과 validation으로 나눠줘야 됨.
model.fit(train_images, train_labels, batch_size = 128, validation_data=(val_images, val_labels), epochs=20)
from sklearn.model_selection import train_test_split
해주고
train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.2, random_state = 100 )
해야 함.
이런식으로 되게 밸런스 맞춰주기!
train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.2, stratify = train_labels , random_state = 100 )
stratify 는 나눠지는 (라벨 비율이 같게!) 거의 밸런스 맞춰지게 나눠짐.
history = model.fit(train_images, train_labels, batch_size = 128, validation_split=0.2, epochs=20)
이 process의 result를 history에 일단 저장한다.
이건 training loss를 minimize하도록만 진행된다.
validation data는 not involved in the training 되고 있따.
validation loss는 줄고 있다가 언젠가부터 안줄어.
이렇게 쭉 20개 있음(epoch 20으로 햇으니까!)
노란색 validation loss 가 줄다가 점점 커짐.
overfitting 되었다는 뜻임.
교차점 or 노란색이 minimum일 때 의 epoch인 7일 때가 제일 베스트이다.
val_loss 가 제일 작은 게
best_model.h5에 저장되어잇음.
근데 만약 epochs=10000 이렇게 설정하면, 너무너무 많이 기달려 야하니까
근데 올라갓다 내려갓다 그러니까
patience = 5 이것두 넣어줘야 함.
5번이나 기다렸는데 그 5번전꺼보다 높으면 stop해라.
#import Library
from tensorflow import keras
import matplotlib.pyplot as plt # from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
# keras.datasets. #하면 7가지 예제 있다. boston_housing 등
(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()
#Data Normalization : 0~255 scale to 0~1
train_images = train_images / 255.0
test_images = test_images / 255.0
train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.2, stratify = train_labels , random_state = 100 )
#stratify 는 나눠지는 (라벨 비율이 같게!) 거의 밸런스 맞춰지게 나눠짐.
train_images = train_images.reshape(train_images.shape[0], 28, 28, 1) #이 경우 train_images.shape[0]는 48000
test_images = test_images.reshape(10000, 28, 28, 1)
#Modeling
model = keras.Sequential()
model.add(keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation = 'relu', padding='same' ,input_shape = (28, 28, 1)))
model.add(keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu'))
model.add(keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(64, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))
print(model.summary())
model.compile(loss = 'SparseCategoricalCrossentropy', optimizer='adam', metrics=['accuracy'])
path = 'best_model.h5'
modelCheckpoint = keras.callbacks.ModelCheckpoint(filepath=path, monitor='val_loss', save_best_only = True)
#earlyStopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
#model.fit(train_images, train_labels, batch_size = 128, validation_split=0.2, epochs=20)
#history = model.fit(train_images, train_labels, batch_size = 128, validation_data=(val_images, val_labels), epochs=10)
history = model.fit(train_images, train_labels, batch_size = 128, validation_data=(val_images, val_labels), epochs=20)
#loss 줄어들고 accuracy 늘어난다 점점.
#epochs 늘리면 overfiting 될 수도.
#real loss를 줄이는 게 목표. val_loss가 아니라!
model.load_weights('best_model.h5')
loss, acc = model.evaluate(test_images, test_labels)
plt.plot(range(1,21), history.history['loss'], label='Train loss')
plt.plot(range(1,21), history.history['val_loss'], label='Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss values')
plt.legend()
plt.show()
Model: "sequential_11"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_22 (Conv2D) (None, 28, 28, 32) 320
max_pooling2d_22 (MaxPoolin (None, 14, 14, 32) 0
g2D)
conv2d_23 (Conv2D) (None, 14, 14, 64) 18496
max_pooling2d_23 (MaxPoolin (None, 7, 7, 64) 0
g2D)
flatten_11 (Flatten) (None, 3136) 0
dense_33 (Dense) (None, 128) 401536
dense_34 (Dense) (None, 64) 8256
dense_35 (Dense) (None, 10) 650
=================================================================
Total params: 429,258
Trainable params: 429,258
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/20
375/375 [==============================] - 5s 11ms/step - loss: 0.2435 - accuracy: 0.9246 - val_loss: 0.0897 - val_accuracy: 0.9727
Epoch 2/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0625 - accuracy: 0.9803 - val_loss: 0.0525 - val_accuracy: 0.9827
Epoch 3/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0422 - accuracy: 0.9868 - val_loss: 0.0458 - val_accuracy: 0.9854
Epoch 4/20
375/375 [==============================] - 4s 9ms/step - loss: 0.0304 - accuracy: 0.9909 - val_loss: 0.0361 - val_accuracy: 0.9893
Epoch 5/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0224 - accuracy: 0.9932 - val_loss: 0.0337 - val_accuracy: 0.9889
Epoch 6/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0193 - accuracy: 0.9941 - val_loss: 0.0468 - val_accuracy: 0.9853
Epoch 7/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0160 - accuracy: 0.9945 - val_loss: 0.0331 - val_accuracy: 0.9902
Epoch 8/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0107 - accuracy: 0.9966 - val_loss: 0.0382 - val_accuracy: 0.9902
Epoch 9/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0141 - accuracy: 0.9951 - val_loss: 0.0360 - val_accuracy: 0.9905
Epoch 10/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0082 - accuracy: 0.9971 - val_loss: 0.0420 - val_accuracy: 0.9896
Epoch 11/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0077 - accuracy: 0.9974 - val_loss: 0.0408 - val_accuracy: 0.9898
Epoch 12/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0096 - accuracy: 0.9966 - val_loss: 0.0401 - val_accuracy: 0.9887
Epoch 13/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0069 - accuracy: 0.9976 - val_loss: 0.0339 - val_accuracy: 0.9918
Epoch 14/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0029 - accuracy: 0.9991 - val_loss: 0.0405 - val_accuracy: 0.9903
Epoch 15/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0065 - accuracy: 0.9975 - val_loss: 0.0409 - val_accuracy: 0.9912
Epoch 16/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0056 - accuracy: 0.9981 - val_loss: 0.0553 - val_accuracy: 0.9869
Epoch 17/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0061 - accuracy: 0.9980 - val_loss: 0.0513 - val_accuracy: 0.9880
Epoch 18/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0051 - accuracy: 0.9984 - val_loss: 0.0532 - val_accuracy: 0.9899
Epoch 19/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0045 - accuracy: 0.9986 - val_loss: 0.0457 - val_accuracy: 0.9897
Epoch 20/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0049 - accuracy: 0.9985 - val_loss: 0.0481 - val_accuracy: 0.9893
313/313 [==============================] - 2s 4ms/step - loss: 0.0338 - accuracy: 0.9889
#import Library
from tensorflow import keras
import matplotlib.pyplot as plt # from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
# keras.datasets. #하면 7가지 예제 있다. boston_housing 등
(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()
#Data Normalization : 0~255 scale to 0~1
train_images = train_images / 255.0
test_images = test_images / 255.0
train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.2, stratify = train_labels , random_state = 100 )
#stratify 는 나눠지는 (라벨 비율이 같게!) 거의 밸런스 맞춰지게 나눠짐.
train_images = train_images.reshape(train_images.shape[0], 28, 28, 1) #이 경우 train_images.shape[0]는 48000
test_images = test_images.reshape(10000, 28, 28, 1)
#Modeling
model = keras.Sequential()
model.add(keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation = 'relu', padding='same' ,input_shape = (28, 28, 1)))
model.add(keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu'))
model.add(keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(64, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))
print(model.summary())
model.compile(loss = 'SparseCategoricalCrossentropy', optimizer='adam', metrics=['accuracy'])
path = 'best_model.h5'
modelCheckpoint = keras.callbacks.ModelCheckpoint(filepath=path, monitor='val_loss', save_best_only = True)
earlyStopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
#model.fit(train_images, train_labels, batch_size = 128, validation_split=0.2, epochs=20)
#history = model.fit(train_images, train_labels, batch_size = 128, validation_data=(val_images, val_labels), epochs=10)
history = model.fit(train_images, train_labels, batch_size = 128, validation_data=(val_images, val_labels), callbacks=[modelCheckpoint, earlyStopping], epochs=20)
#loss 줄어들고 accuracy 늘어난다 점점.
#epochs 늘리면 overfiting 될 수도.
#real loss를 줄이는 게 목표. val_loss가 아니라!
model.load_weights('best_model.h5')
loss, acc = model.evaluate(test_images, test_labels)
plt.plot(range(1,21), history.history['loss'], label='Train loss')
plt.plot(range(1,21), history.history['val_loss'], label='Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss values')
plt.legend()
plt.show()
Model: "sequential_12"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_24 (Conv2D) (None, 28, 28, 32) 320
max_pooling2d_24 (MaxPoolin (None, 14, 14, 32) 0
g2D)
conv2d_25 (Conv2D) (None, 14, 14, 64) 18496
max_pooling2d_25 (MaxPoolin (None, 7, 7, 64) 0
g2D)
flatten_12 (Flatten) (None, 3136) 0
dense_36 (Dense) (None, 128) 401536
dense_37 (Dense) (None, 64) 8256
dense_38 (Dense) (None, 10) 650
=================================================================
Total params: 429,258
Trainable params: 429,258
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/20
375/375 [==============================] - 5s 11ms/step - loss: 0.2459 - accuracy: 0.9263 - val_loss: 0.0762 - val_accuracy: 0.9760
Epoch 2/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0632 - accuracy: 0.9803 - val_loss: 0.0508 - val_accuracy: 0.9838
Epoch 3/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0426 - accuracy: 0.9864 - val_loss: 0.0473 - val_accuracy: 0.9851
Epoch 4/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0322 - accuracy: 0.9898 - val_loss: 0.0424 - val_accuracy: 0.9868
Epoch 5/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0270 - accuracy: 0.9911 - val_loss: 0.0361 - val_accuracy: 0.9892
Epoch 6/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0205 - accuracy: 0.9933 - val_loss: 0.0371 - val_accuracy: 0.9878
Epoch 7/20
375/375 [==============================] - 4s 9ms/step - loss: 0.0152 - accuracy: 0.9953 - val_loss: 0.0383 - val_accuracy: 0.9887
Epoch 8/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0146 - accuracy: 0.9951 - val_loss: 0.0363 - val_accuracy: 0.9892
Epoch 9/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0112 - accuracy: 0.9961 - val_loss: 0.0338 - val_accuracy: 0.9918
Epoch 10/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0104 - accuracy: 0.9968 - val_loss: 0.0342 - val_accuracy: 0.9901
Epoch 11/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0070 - accuracy: 0.9977 - val_loss: 0.0450 - val_accuracy: 0.9876
Epoch 12/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0088 - accuracy: 0.9969 - val_loss: 0.0423 - val_accuracy: 0.9903
Epoch 13/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0057 - accuracy: 0.9980 - val_loss: 0.0467 - val_accuracy: 0.9886
Epoch 14/20
375/375 [==============================] - 4s 9ms/step - loss: 0.0081 - accuracy: 0.9971 - val_loss: 0.0454 - val_accuracy: 0.9883
313/313 [==============================] - 2s 5ms/step - loss: 0.0308 - accuracy: 0.9911
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-21-ebf762ad33b4> in <module>() 46 loss, acc = model.evaluate(test_images, test_labels) 47 ---> 48 plt.plot(range(1,21), history.history['loss'], label='Train loss') 49 plt.plot(range(1,21), history.history['val_loss'], label='Validation loss') 50 plt.xlabel('Epochs') /usr/local/lib/python3.7/dist-packages/matplotlib/pyplot.py in plot(scalex, scaley, data, *args, **kwargs) 2761 return gca().plot( 2762 *args, scalex=scalex, scaley=scaley, **({"data": data} if data -> 2763 is not None else {}), **kwargs) 2764 2765 /usr/local/lib/python3.7/dist-packages/matplotlib/axes/_axes.py in plot(self, scalex, scaley, data, *args, **kwargs) 1645 """ 1646 kwargs = cbook.normalize_kwargs(kwargs, mlines.Line2D) -> 1647 lines = [*self._get_lines(*args, data=data, **kwargs)] 1648 for line in lines: 1649 self.add_line(line) /usr/local/lib/python3.7/dist-packages/matplotlib/axes/_base.py in __call__(self, *args, **kwargs) 214 this += args[0], 215 args = args[1:] --> 216 yield from self._plot_args(this, kwargs) 217 218 def get_next_color(self): /usr/local/lib/python3.7/dist-packages/matplotlib/axes/_base.py in _plot_args(self, tup, kwargs) 340 341 if x.shape[0] != y.shape[0]: --> 342 raise ValueError(f"x and y must have same first dimension, but " 343 f"have shapes {x.shape} and {y.shape}") 344 if x.ndim > 2 or y.ndim > 2: ValueError: x and y must have same first dimension, but have shapes (20,) and (14,)
왜 accuracy 말고 loss로 할까?
여기서 accuracy는 100퍼센트지만
그치만 그닥 안좋아서 더 트레이닝해야된다.
이건 accuracy가 75퍼지만, 더 좋은 모델이다.
이 때문에 monitor를 val_accuracy 말고 val_loss를 쓴다.
#import Library
from tensorflow import keras
import matplotlib.pyplot as plt # from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
# keras.datasets. #하면 7가지 예제 있다. boston_housing 등
(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()
#Data Normalization : 0~255 scale to 0~1
train_images = train_images / 255.0
test_images = test_images / 255.0
train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.2, stratify = train_labels , random_state = 100 )
#stratify 는 나눠지는 (라벨 비율이 같게!) 거의 밸런스 맞춰지게 나눠짐.
train_images = train_images.reshape(train_images.shape[0], 28, 28, 1) #이 경우 train_images.shape[0]는 48000
test_images = test_images.reshape(10000, 28, 28, 1)
#Modeling
input = keras.layers.Input(shape=(28,28,1), name='Input_layer')
x = keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', input_shape=(28,28,1), name='1stCV')(input)
x = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='1stMP')(x)
x = keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu', name='2ndCV')(x)
x = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='2ndMP')(x)
x = keras.layers.Flatten(name='Flatten')(x)
x = keras.layers.Dense(128, activation='relu', name='1stFC')(x)
x = keras.layers.Dense(64, activation='relu', name='2ndFC')(x)
output = keras.layers.Dense(10, activation='softmax', name='lastFC')(x)
model1 = keras.models.Model(inputs=input, outputs=output)
print(model1.summary())
model1.compile(loss = 'SparseCategoricalCrossentropy', optimizer='adam', metrics=['accuracy'])
path = 'best_model1.h5'
modelCheckpoint = keras.callbacks.ModelCheckpoint(filepath=path, monitor='val_loss', save_best_only = True)
#earlyStopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
history = model1.fit(train_images, train_labels, batch_size = 128, validation_data=(val_images, val_labels), epochs=20)
model1.load_weights('best_model.h5')
loss, acc = model1.evaluate(test_images, test_labels)
plt.plot(range(1,21), history.history['loss'], label='Train loss')
plt.plot(range(1,21), history.history['val_loss'], label='Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss values')
plt.legend()
plt.show()
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
Input_layer (InputLayer) [(None, 28, 28, 1)] 0
1stCV (Conv2D) (None, 28, 28, 32) 320
1stMP (MaxPooling2D) (None, 14, 14, 32) 0
2ndCV (Conv2D) (None, 14, 14, 64) 18496
2ndMP (MaxPooling2D) (None, 7, 7, 64) 0
Flatten (Flatten) (None, 3136) 0
1stFC (Dense) (None, 128) 401536
2ndFC (Dense) (None, 64) 8256
lastFC (Dense) (None, 10) 650
=================================================================
Total params: 429,258
Trainable params: 429,258
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/20
375/375 [==============================] - 5s 11ms/step - loss: 0.2435 - accuracy: 0.9254 - val_loss: 0.0773 - val_accuracy: 0.9759
Epoch 2/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0658 - accuracy: 0.9796 - val_loss: 0.0567 - val_accuracy: 0.9815
Epoch 3/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0446 - accuracy: 0.9867 - val_loss: 0.0450 - val_accuracy: 0.9873
Epoch 4/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0344 - accuracy: 0.9889 - val_loss: 0.0395 - val_accuracy: 0.9888
Epoch 5/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0246 - accuracy: 0.9923 - val_loss: 0.0447 - val_accuracy: 0.9864
Epoch 6/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0205 - accuracy: 0.9933 - val_loss: 0.0425 - val_accuracy: 0.9867
Epoch 7/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0169 - accuracy: 0.9943 - val_loss: 0.0337 - val_accuracy: 0.9898
Epoch 8/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0130 - accuracy: 0.9955 - val_loss: 0.0347 - val_accuracy: 0.9905
Epoch 9/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0120 - accuracy: 0.9961 - val_loss: 0.0353 - val_accuracy: 0.9893
Epoch 10/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0091 - accuracy: 0.9973 - val_loss: 0.0467 - val_accuracy: 0.9866
Epoch 11/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0079 - accuracy: 0.9971 - val_loss: 0.0488 - val_accuracy: 0.9870
Epoch 12/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0077 - accuracy: 0.9973 - val_loss: 0.0399 - val_accuracy: 0.9907
Epoch 13/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0093 - accuracy: 0.9968 - val_loss: 0.0448 - val_accuracy: 0.9869
Epoch 14/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0063 - accuracy: 0.9980 - val_loss: 0.0378 - val_accuracy: 0.9904
Epoch 15/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0044 - accuracy: 0.9984 - val_loss: 0.0455 - val_accuracy: 0.9882
Epoch 16/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0032 - accuracy: 0.9990 - val_loss: 0.0711 - val_accuracy: 0.9866
Epoch 17/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0048 - accuracy: 0.9984 - val_loss: 0.0500 - val_accuracy: 0.9892
Epoch 18/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0069 - accuracy: 0.9979 - val_loss: 0.0355 - val_accuracy: 0.9915
Epoch 19/20
375/375 [==============================] - 4s 9ms/step - loss: 0.0031 - accuracy: 0.9991 - val_loss: 0.0504 - val_accuracy: 0.9887
Epoch 20/20
375/375 [==============================] - 4s 10ms/step - loss: 0.0037 - accuracy: 0.9990 - val_loss: 0.0443 - val_accuracy: 0.9897
313/313 [==============================] - 1s 4ms/step - loss: 0.0308 - accuracy: 0.9911
x = keras.layers.Conv2D(filters = 32, kernel_size = (3,3), activation = 'relu', padding = 'same', input_shape = (28, 28, 1), name = '1stCV')(input)
x = keras.layers.MaxPool2D(pool_size = (2,2), strides=(2,2), name='1stMP')(x)
model1.get_layer('2ndCV')
<keras.layers.convolutional.Conv2D at 0x7fb9d778e290>
model1.get_layer('2ndCV').output #None은 batch size를 의미, 14 by 14 by 64가 확인됨.
<KerasTensor: shape=(None, 14, 14, 64) dtype=float32 (created by layer '2ndCV')>
second_cv = model1.get_layer('2ndCV').output
model2 = keras.models.Model(inputs = input, outputs = second_cv)
model2. summary()
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
Input_layer (InputLayer) [(None, 28, 28, 1)] 0
1stCV (Conv2D) (None, 28, 28, 32) 320
1stMP (MaxPooling2D) (None, 14, 14, 32) 0
2ndCV (Conv2D) (None, 14, 14, 64) 18496
=================================================================
Total params: 18,816
Trainable params: 18,816
Non-trainable params: 0
_________________________________________________________________
first_cv = model1.get_layer('1stCV').output
second_cv = model1.get_layer('2ndCV').output
model3 = keras.models.Model(inputs = input, outputs = [first_cv, second_cv] )
model3. summary()
#일케하면 multi output (이경우 2output) 임.
Model: "model_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
Input_layer (InputLayer) [(None, 28, 28, 1)] 0
1stCV (Conv2D) (None, 28, 28, 32) 320
1stMP (MaxPooling2D) (None, 14, 14, 32) 0
2ndCV (Conv2D) (None, 14, 14, 64) 18496
=================================================================
Total params: 18,816
Trainable params: 18,816
Non-trainable params: 0
_________________________________________________________________
test_images.shape
(10000, 28, 28, 1)
input_image = test_images[0]
#첫번째 이미지를 함 확인해보자.
input_image.shape
(28, 28, 1)
#plt.imshow(input_image, cmap='gray')
#plt.show()
#이건 (28,28,1)라서 오류난다.
plt.imshow(input_image.reshape(28,28), cmap='gray')
plt.show()
input_image = input_image.reshape(1,28, 28,1)
conv_outputs = model2(input_image)
#4D로 만들어줘야 한다. batch 자리도 만들어줘야 함. (1, 28, 28, 1)의 사이즈로 만들어야 함.
type(conv_outputs)
#EagerTensor
#tensorflow는 main engiene. backend.
#keras는 front end, human과 연결하기 위함.
#model로 들어가는 data type이 numpy, list, tuple, tensor 등 다양함.
#tensorflow는 all data가 type of tensor이다. output도 tensor이다.
#이경우 tensorflow가 eagertensor로 output한 것임. (업데이트 된거임)
tensorflow.python.framework.ops.EagerTensor
conv_outputs = conv_outputs.numpy()
type(conv_outputs)
numpy.ndarray
conv_outputs.shape
(1, 14, 14, 64)
conv_outputs = conv_outputs.reshape(14,14,64)
plt.imshow(conv_outputs[:,:,0], cmap='gray' )
plt.show()
#이게 2nd Convolution의 activation map이다.
plt.figure(figsize=(12,12))
plt.subplot(8,8,1)
plt.imshow(conv_outputs[:,:,0], cmap='gray' )
plt.subplot(8,8,2)
plt.imshow(conv_outputs[:,:,1], cmap='gray' )
plt.subplot(8,8,3)
plt.imshow(conv_outputs[:,:,2], cmap='gray' )
plt.show()
plt.figure(figsize=(12,12))
for i in range(64) :
plt.subplot(8,8,i+1)
plt.imshow(conv_outputs[:,:,i], cmap='gray' )
plt.xticks([])
plt.yticks([])
plt.show()
#이거 64개가 각각 64개의 다른 필터 통과하고 난 이후인거임.
#import Library
from tensorflow import keras
import matplotlib.pyplot as plt # from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
# keras.datasets. #하면 7가지 예제 있다. boston_housing 등
#(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()
fashion = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion.load_data()
#Data Normalization : 0~255 scale to 0~1
train_images = train_images / 255.0
test_images = test_images / 255.0
train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.2, stratify = train_labels , random_state = 100 )
#stratify 는 나눠지는 (라벨 비율이 같게!) 거의 밸런스 맞춰지게 나눠짐.
train_images = train_images.reshape(train_images.shape[0], 28, 28, 1) #이 경우 train_images.shape[0]는 48000
test_images = test_images.reshape(10000, 28, 28, 1)
#Modeling
input = keras.layers.Input(shape=(28,28,1), name='Input_layer')
x = keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', input_shape=(28,28,1), name='1stCV')(input)
x = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='1stMP')(x)
x = keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu', name='2ndCV')(x)
x = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='2ndMP')(x)
x = keras.layers.Flatten(name='Flatten')(x)
x = keras.layers.Dense(128, activation='relu', name='1stFC')(x)
x = keras.layers.Dense(64, activation='relu', name='2ndFC')(x)
output = keras.layers.Dense(10, activation='softmax', name='lastFC')(x)
model1 = keras.models.Model(inputs=input, outputs=output)
print(model1.summary())
first_cv = model1.get_layer('1stCV').output
second_cv = model1.get_layer('2ndCV').output
model2 = keras.models.Model(inputs = input, outputs = second_cv)
print(model2. summary())
model3 = keras.models.Model(inputs = input, outputs = [first_cv, second_cv] ) #일케하면 multi output (이경우 2output) 임.
print(model3. summary())
input_image = test_images[0]
input_image = input_image.reshape(1,28, 28,1)
conv_outputs = model2(input_image)
#4D로 만들어줘야 한다. batch 자리도 만들어줘야 함. (1, 28, 28, 1)의 사이즈로 만들어야 함.
conv_outputs = conv_outputs.numpy()
conv_outputs = conv_outputs.reshape(14,14,64)
plt.figure(figsize=(12,12))
for i in range(64) :
plt.subplot(8,8,i+1)
plt.imshow(conv_outputs[:,:,i], cmap='gray' )
plt.xticks([])
plt.yticks([])
plt.show()
'''
model1.compile(loss = 'SparseCategoricalCrossentropy', optimizer='adam', metrics=['accuracy'])
path = 'best_model1.h5'
modelCheckpoint = keras.callbacks.ModelCheckpoint(filepath=path, monitor='val_loss', save_best_only = True)
#earlyStopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
history = model1.fit(train_images, train_labels, batch_size = 128, validation_data=(val_images, val_labels), epochs=20)
model1.load_weights('best_model.h5')
loss, acc = model1.evaluate(test_images, test_labels)
plt.plot(range(1,21), history.history['loss'], label='Train loss')
plt.plot(range(1,21), history.history['val_loss'], label='Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss values')
plt.legend()
plt.show()
'''
Model: "model_6"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
Input_layer (InputLayer) [(None, 28, 28, 1)] 0
1stCV (Conv2D) (None, 28, 28, 32) 320
1stMP (MaxPooling2D) (None, 14, 14, 32) 0
2ndCV (Conv2D) (None, 14, 14, 64) 18496
2ndMP (MaxPooling2D) (None, 7, 7, 64) 0
Flatten (Flatten) (None, 3136) 0
1stFC (Dense) (None, 128) 401536
2ndFC (Dense) (None, 64) 8256
lastFC (Dense) (None, 10) 650
=================================================================
Total params: 429,258
Trainable params: 429,258
Non-trainable params: 0
_________________________________________________________________
None
Model: "model_7"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
Input_layer (InputLayer) [(None, 28, 28, 1)] 0
1stCV (Conv2D) (None, 28, 28, 32) 320
1stMP (MaxPooling2D) (None, 14, 14, 32) 0
2ndCV (Conv2D) (None, 14, 14, 64) 18496
=================================================================
Total params: 18,816
Trainable params: 18,816
Non-trainable params: 0
_________________________________________________________________
None
Model: "model_8"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
Input_layer (InputLayer) [(None, 28, 28, 1)] 0
1stCV (Conv2D) (None, 28, 28, 32) 320
1stMP (MaxPooling2D) (None, 14, 14, 32) 0
2ndCV (Conv2D) (None, 14, 14, 64) 18496
=================================================================
Total params: 18,816
Trainable params: 18,816
Non-trainable params: 0
_________________________________________________________________
None
"\nmodel1.compile(loss = 'SparseCategoricalCrossentropy', optimizer='adam', metrics=['accuracy'])\n\npath = 'best_model1.h5'\nmodelCheckpoint = keras.callbacks.ModelCheckpoint(filepath=path, monitor='val_loss', save_best_only = True)\n#earlyStopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)\n\nhistory = model1.fit(train_images, train_labels, batch_size = 128, validation_data=(val_images, val_labels), epochs=20)\n\nmodel1.load_weights('best_model.h5')\nloss, acc = model1.evaluate(test_images, test_labels)\n\nplt.plot(range(1,21), history.history['loss'], label='Train loss')\nplt.plot(range(1,21), history.history['val_loss'], label='Validation loss')\nplt.xlabel('Epochs')\nplt.ylabel('Loss values')\nplt.legend()\nplt.show()\n\n"
#import Library
from tensorflow import keras
import matplotlib.pyplot as plt # from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
# keras.datasets. #하면 7가지 예제 있다. boston_housing 등
#(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()
fashion = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion.load_data()
i=0
plt.imshow(train_images[i], cmap='gray')
plt.xticks([])
plt.yticks([])
plt.xlabel(f'This is {train_labels[i]}')
plt.ylabel('28x28 image')
plt.title('This is MNIST image')
plt.show()
plt.figure(figsize=(15,15))
plt.plot(5,5)
for i in range(25) :
plt.subplot(5,5,i+1)
plt.imshow(train_images[i], cmap='gray')
plt.xticks([])
plt.yticks([])
plt.xlabel(f'This is {train_labels[i]}')
plt.ylabel('28x28 image')
plt.title('This is MNIST image')
plt.show()
#import Library
from tensorflow import keras
import matplotlib.pyplot as plt # from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
# keras.datasets. #하면 7가지 예제 있다. boston_housing 등
#(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()
fashion = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion.load_data()
#Data Normalization : 0~255 scale to 0~1
train_images = train_images / 255.0
test_images = test_images / 255.0
train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.2, stratify = train_labels , random_state = 100 )
#stratify 는 나눠지는 (라벨 비율이 같게!) 거의 밸런스 맞춰지게 나눠짐.
train_images = train_images.reshape(train_images.shape[0], 28, 28, 1) #이 경우 train_images.shape[0]는 48000
test_images = test_images.reshape(test_images.shape[0], 28, 28, 1)
#Modeling
input = keras.layers.Input(shape=(28,28,1), name='Input_layer')
x = keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', input_shape=(28,28,1), name='1stCV')(input)
x1 = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='1stMP')(x)
x = keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu', name='2ndCV')(x1)
x = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='2ndMP')(x)
x = keras.layers.Flatten(name='Flatten')(x)
x = keras.layers.Dense(128, activation='relu', name='1stFC')(x)
x = keras.layers.Dense(64, activation='relu', name='2ndFC')(x)
output = keras.layers.Dense(10, activation='softmax', name='lastFC')(x)
model1 = keras.models.Model(inputs=input, outputs=output)
print(model1.summary())
'''
first_cv = model1.get_layer('1stCV').output
second_cv = model1.get_layer('2ndCV').output
model2 = keras.models.Model(inputs = input, outputs = second_cv)
print(model2. summary())
model3 = keras.models.Model(inputs = input, outputs = [first_cv, second_cv] ) #일케하면 multi output (이경우 2output) 임.
print(model3. summary())
input_image = test_images[0]
input_image = input_image.reshape(1,28, 28,1)
conv_outputs = model2(input_image)
#4D로 만들어줘야 한다. batch 자리도 만들어줘야 함. (1, 28, 28, 1)의 사이즈로 만들어야 함.
conv_outputs = conv_outputs.numpy()
conv_outputs = conv_outputs.reshape(14,14,64)
plt.figure(figsize=(12,12))
for i in range(64) :
plt.subplot(8,8,i+1)
plt.imshow(conv_outputs[:,:,i], cmap='gray' )
plt.xticks([])
plt.yticks([])
plt.show()
'''
model1.compile(loss = 'SparseCategoricalCrossentropy', optimizer='adam', metrics=['accuracy'])
#'adam' or gradient descent method
path = 'best_model1.h5'
modelCheckpoint = keras.callbacks.ModelCheckpoint(filepath=path, monitor='val_loss', save_best_only = True)
earlyStopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
history = model1.fit(train_images, train_labels, batch_size = 128, validation_data=(val_images, val_labels), callbacks=[modelCheckpoint, earlyStopping], epochs=1000)
model1.load_weights('best_model1.h5')
loss, acc = model1.evaluate(test_images, test_labels)
plt.plot(history.history['loss'], label='Train loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss values')
plt.legend()
plt.show()
plt.plot(history.history['accuracy'], label='Train accuracy')
plt.plot(history.history['val_accuracy'], label='Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Loss values')
plt.legend()
plt.show()
Model: "model_11"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
Input_layer (InputLayer) [(None, 28, 28, 1)] 0
1stCV (Conv2D) (None, 28, 28, 32) 320
1stMP (MaxPooling2D) (None, 14, 14, 32) 0
2ndCV (Conv2D) (None, 14, 14, 64) 18496
2ndMP (MaxPooling2D) (None, 7, 7, 64) 0
Flatten (Flatten) (None, 3136) 0
1stFC (Dense) (None, 128) 401536
2ndFC (Dense) (None, 64) 8256
lastFC (Dense) (None, 10) 650
=================================================================
Total params: 429,258
Trainable params: 429,258
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/1000
375/375 [==============================] - 5s 13ms/step - loss: 0.5413 - accuracy: 0.8017 - val_loss: 0.3663 - val_accuracy: 0.8705
Epoch 2/1000
375/375 [==============================] - 5s 13ms/step - loss: 0.3355 - accuracy: 0.8813 - val_loss: 0.2976 - val_accuracy: 0.8931
Epoch 3/1000
375/375 [==============================] - 5s 13ms/step - loss: 0.2824 - accuracy: 0.8970 - val_loss: 0.2715 - val_accuracy: 0.9041
Epoch 4/1000
375/375 [==============================] - 5s 13ms/step - loss: 0.2479 - accuracy: 0.9100 - val_loss: 0.2622 - val_accuracy: 0.9050
Epoch 5/1000
375/375 [==============================] - 5s 12ms/step - loss: 0.2245 - accuracy: 0.9165 - val_loss: 0.2373 - val_accuracy: 0.9137
Epoch 6/1000
375/375 [==============================] - 5s 12ms/step - loss: 0.2038 - accuracy: 0.9250 - val_loss: 0.2231 - val_accuracy: 0.9184
Epoch 7/1000
375/375 [==============================] - 5s 13ms/step - loss: 0.1839 - accuracy: 0.9315 - val_loss: 0.2227 - val_accuracy: 0.9172
Epoch 8/1000
375/375 [==============================] - 5s 13ms/step - loss: 0.1655 - accuracy: 0.9394 - val_loss: 0.2223 - val_accuracy: 0.9204
Epoch 9/1000
375/375 [==============================] - 5s 12ms/step - loss: 0.1517 - accuracy: 0.9437 - val_loss: 0.2430 - val_accuracy: 0.9111
Epoch 10/1000
375/375 [==============================] - 5s 13ms/step - loss: 0.1380 - accuracy: 0.9483 - val_loss: 0.2212 - val_accuracy: 0.9228
Epoch 11/1000
375/375 [==============================] - 5s 13ms/step - loss: 0.1216 - accuracy: 0.9551 - val_loss: 0.2194 - val_accuracy: 0.9248
Epoch 12/1000
375/375 [==============================] - 5s 13ms/step - loss: 0.1066 - accuracy: 0.9611 - val_loss: 0.2310 - val_accuracy: 0.9238
Epoch 13/1000
375/375 [==============================] - 5s 13ms/step - loss: 0.0940 - accuracy: 0.9656 - val_loss: 0.2540 - val_accuracy: 0.9153
Epoch 14/1000
375/375 [==============================] - 5s 12ms/step - loss: 0.0866 - accuracy: 0.9679 - val_loss: 0.2484 - val_accuracy: 0.9212
Epoch 15/1000
375/375 [==============================] - 5s 12ms/step - loss: 0.0709 - accuracy: 0.9738 - val_loss: 0.2825 - val_accuracy: 0.9164
Epoch 16/1000
375/375 [==============================] - 5s 12ms/step - loss: 0.0642 - accuracy: 0.9770 - val_loss: 0.2843 - val_accuracy: 0.9215
313/313 [==============================] - 2s 4ms/step - loss: 0.2402 - accuracy: 0.9223
#import Library
from tensorflow import keras
import matplotlib.pyplot as plt # from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
# keras.datasets. #하면 7가지 예제 있다. boston_housing 등
#(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()
#fashion = keras.datasets.fashion_mnist
#(train_images, train_labels), (test_images, test_labels) = fashion.load_data()
(train_images, train_labels), (test_images, test_labels) = keras.datasets.cifar10.load_data()
print(train_images.shape, train_labels.shape, test_images.shape, test_labels.shape)
#하면 (50000, 32, 32, 3) (50000, 1) (10000, 32, 32, 3) (10000, 1)로,
#3channel(RGB)임을 알 수 있다.
plt.figure(figsize=(15,15))
plt.plot(5,5)
for i in range(25) :
plt.subplot(5,5,i+1)
plt.imshow(train_images[i])
plt.xticks([])
plt.yticks([])
plt.xlabel(f'This is {train_labels[i]}')
plt.ylabel('32x32 image')
plt.title('This is cifar10 image')
plt.show()
#32 x 32 이미지라서 이미지 화질 구리다.
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz 170500096/170498071 [==============================] - 5s 0us/step 170508288/170498071 [==============================] - 5s 0us/step (50000, 32, 32, 3) (50000, 1) (10000, 32, 32, 3) (10000, 1)
#Data Normalization : 0~255 scale to 0~1
train_images = train_images / 255.0
test_images = test_images / 255.0
train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.2, stratify = train_labels , random_state = 99 )
#stratify 는 나눠지는 (라벨 비율이 같게!) 거의 밸런스 맞춰지게 나눠짐.
print(train_images.shape)
print(train_images.shape[0])
(40000, 32, 32, 3) 40000
#이미 3D니까 reshape할 필요 없다.
"""
train_images = train_images.reshape(train_images.shape[0], 28, 28, 1)
test_images = test_images.reshape(test_images.shape[0], 28, 28, 1)
print(train_images.shape)
print(test_images.shape)
"""
'\ntrain_images = train_images.reshape(train_images.shape[0], 28, 28, 1) \ntest_images = test_images.reshape(test_images.shape[0], 28, 28, 1)\n\nprint(train_images.shape)\nprint(test_images.shape)\n'
#Modeling
input = keras.layers.Input(shape=(32,32,3), name='Input_layer')
x = keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', name='1stCV')(input)
x1 = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='1stMP')(x)
x = keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu', name='2ndCV')(x1)
x = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='2ndMP')(x)
x = keras.layers.Flatten(name='Flatten')(x)
x = keras.layers.Dense(128, activation='relu', name='1stFC')(x)
x = keras.layers.Dense(64, activation='relu', name='2ndFC')(x)
output = keras.layers.Dense(10, activation='softmax', name='lastFC')(x)
model1 = keras.models.Model(inputs=input, outputs=output)
print(model1.summary())
'''
first_cv = model1.get_layer('1stCV').output
second_cv = model1.get_layer('2ndCV').output
model2 = keras.models.Model(inputs = input, outputs = second_cv)
print(model2. summary())
model3 = keras.models.Model(inputs = input, outputs = [first_cv, second_cv] ) #일케하면 multi output (이경우 2output) 임.
print(model3. summary())
input_image = test_images[0]
input_image = input_image.reshape(1,28, 28,1)
conv_outputs = model2(input_image)
#4D로 만들어줘야 한다. batch 자리도 만들어줘야 함. (1, 28, 28, 1)의 사이즈로 만들어야 함.
conv_outputs = conv_outputs.numpy()
conv_outputs = conv_outputs.reshape(14,14,64)
plt.figure(figsize=(12,12))
for i in range(64) :
plt.subplot(8,8,i+1)
plt.imshow(conv_outputs[:,:,i], cmap='gray' )
plt.xticks([])
plt.yticks([])
plt.show()
'''
model1.compile(loss = 'SparseCategoricalCrossentropy', optimizer='adam', metrics=['accuracy'])
#'adam' or gradient descent method
path = 'best_model1.h5'
modelCheckpoint = keras.callbacks.ModelCheckpoint(filepath=path, monitor='val_loss', save_best_only = True)
earlyStopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
history = model1.fit(train_images, train_labels, batch_size = 128, validation_data=(val_images, val_labels), callbacks=[modelCheckpoint, earlyStopping], epochs=1000)
model1.load_weights('best_model1.h5')
loss, acc = model1.evaluate(test_images, test_labels)
plt.plot(history.history['loss'], label='Train loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss values')
plt.legend()
plt.show()
plt.plot(history.history['accuracy'], label='Train accuracy')
plt.plot(history.history['val_accuracy'], label='Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Loss values')
plt.legend()
plt.show()
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
Input_layer (InputLayer) [(None, 32, 32, 3)] 0
1stCV (Conv2D) (None, 32, 32, 32) 896
1stMP (MaxPooling2D) (None, 16, 16, 32) 0
2ndCV (Conv2D) (None, 16, 16, 64) 18496
2ndMP (MaxPooling2D) (None, 8, 8, 64) 0
Flatten (Flatten) (None, 4096) 0
1stFC (Dense) (None, 128) 524416
2ndFC (Dense) (None, 64) 8256
lastFC (Dense) (None, 10) 650
=================================================================
Total params: 552,714
Trainable params: 552,714
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/1000
313/313 [==============================] - 13s 14ms/step - loss: 1.5896 - accuracy: 0.4236 - val_loss: 1.3285 - val_accuracy: 0.5344
Epoch 2/1000
313/313 [==============================] - 4s 12ms/step - loss: 1.2073 - accuracy: 0.5743 - val_loss: 1.1180 - val_accuracy: 0.6068
Epoch 3/1000
313/313 [==============================] - 4s 12ms/step - loss: 1.0344 - accuracy: 0.6361 - val_loss: 1.0391 - val_accuracy: 0.6358
Epoch 4/1000
313/313 [==============================] - 4s 13ms/step - loss: 0.9308 - accuracy: 0.6763 - val_loss: 0.9936 - val_accuracy: 0.6537
Epoch 5/1000
313/313 [==============================] - 4s 13ms/step - loss: 0.8520 - accuracy: 0.7014 - val_loss: 0.9763 - val_accuracy: 0.6592
Epoch 6/1000
313/313 [==============================] - 4s 13ms/step - loss: 0.7852 - accuracy: 0.7271 - val_loss: 0.9239 - val_accuracy: 0.6818
Epoch 7/1000
313/313 [==============================] - 4s 13ms/step - loss: 0.7215 - accuracy: 0.7473 - val_loss: 0.9071 - val_accuracy: 0.6846
Epoch 8/1000
313/313 [==============================] - 4s 13ms/step - loss: 0.6633 - accuracy: 0.7676 - val_loss: 0.9568 - val_accuracy: 0.6797
Epoch 9/1000
313/313 [==============================] - 4s 12ms/step - loss: 0.6091 - accuracy: 0.7875 - val_loss: 0.9054 - val_accuracy: 0.6947
Epoch 10/1000
313/313 [==============================] - 4s 13ms/step - loss: 0.5515 - accuracy: 0.8067 - val_loss: 0.9267 - val_accuracy: 0.6971
Epoch 11/1000
313/313 [==============================] - 4s 12ms/step - loss: 0.5000 - accuracy: 0.8243 - val_loss: 0.9358 - val_accuracy: 0.6935
Epoch 12/1000
313/313 [==============================] - 4s 13ms/step - loss: 0.4508 - accuracy: 0.8419 - val_loss: 1.0333 - val_accuracy: 0.6809
Epoch 13/1000
313/313 [==============================] - 4s 12ms/step - loss: 0.3979 - accuracy: 0.8620 - val_loss: 1.0673 - val_accuracy: 0.6845
Epoch 14/1000
313/313 [==============================] - 4s 12ms/step - loss: 0.3389 - accuracy: 0.8827 - val_loss: 1.0705 - val_accuracy: 0.6937
313/313 [==============================] - 2s 5ms/step - loss: 0.8917 - accuracy: 0.7041
이제 layer 수 증가시켜서 더 complicate한 모델 만들자...
"""
#Modeling
input = keras.layers.Input(shape=(32,32,3), name='Input_layer')
x = keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', name='1stCV')(input)
x1 = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='1stMP')(x)
x = keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu', name='2ndCV')(x1)
x = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='2ndMP')(x)
x = keras.layers.Conv2D(filters=128, kernel_size=(5,5), padding='same', activation='relu', name='2ndCV')(x1)
x = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='2ndMP')(x)
x = keras.layers.Conv2D(filters=128, kernel_size=(5,5), padding='same', activation='relu', name='2ndCV')(x1)
x = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='2ndMP')(x)
x = keras.layers.Flatten(name='Flatten')(x)
x = keras.layers.Dense(128, activation='relu', name='1stFC')(x)
x = keras.layers.Dense(64, activation='relu', name='2ndFC')(x)
output = keras.layers.Dense(10, activation='softmax', name='lastFC')(x)
model1 = keras.models.Model(inputs=input, outputs=output)
print(model1.summary())
"""
근데 이거 넘 optimize하기 힘든 거 아니냐?
transfer learning : resnet50의 model 앞에꺼만 베끼고 뒤에 flatten부터 FC는 사용자가 조절해서 쓰는거
#import Library
from tensorflow import keras
import matplotlib.pyplot as plt # from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.resnet50 import ResNet50 #152 중 50 layer 쓴다(?) bring trained model
# keras.datasets. #하면 7가지 예제 있다. boston_housing 등
#(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()
#fashion = keras.datasets.fashion_mnist
#(train_images, train_labels), (test_images, test_labels) = fashion.load_data()
(train_images, train_labels), (test_images, test_labels) = keras.datasets.cifar10.load_data()
print(train_images.shape, train_labels.shape, test_images.shape, test_labels.shape)
#하면 (50000, 32, 32, 3) (50000, 1) (10000, 32, 32, 3) (10000, 1)로,
#3channel(RGB)임을 알 수 있다.
plt.figure(figsize=(15,15))
plt.plot(5,5)
for i in range(25) :
plt.subplot(5,5,i+1)
plt.imshow(train_images[i])
plt.xticks([])
plt.yticks([])
plt.xlabel(f'This is {train_labels[i]}')
plt.ylabel('32x32 image')
plt.title('This is cifar10 image')
plt.show()
#32 x 32 이미지라서 이미지 화질 구리다.
#Data Normalization : 0~255 scale to 0~1
train_images = train_images / 255.0
test_images = test_images / 255.0
train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.2, stratify = train_labels , random_state = 99 )
#stratify 는 나눠지는 (라벨 비율이 같게!) 거의 밸런스 맞춰지게 나눠짐.
print(train_images.shape)
print(train_images.shape[0])
#Modeling
base_model = ResNet50( include_top=False, input_shape=(32,32,3) )
#우리는 10개 output 필요한데, 1000개 제공하므로 FC(fully connected layer)(top) 필요 없다.
#weights = 'imagent'이란 파라미터는 그 대회에서 쓴 거가 default
input = base_model.input
x = base_model.output
#Flatten 대신 gap 쓰자
x = keras.layers.GlobalAveragePooling2D()(x)
#FC 추가해주자.
x = keras.layers.Dense(128, activation='relu', name='1stFC')(x)
x = keras.layers.Dense(64, activation='relu', name='2ndFC')(x)
output = keras.layers.Dense(10, activation='softmax', name='lastFC')(x)
"""
input = keras.layers.Input(shape=(32,32,3), name='Input_layer')
x = keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', name='1stCV')(input)
x1 = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='1stMP')(x)
x = keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu', name='2ndCV')(x1)
x = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='2ndMP')(x)
x = keras.layers.Flatten(name='Flatten')(x)
x = keras.layers.Dense(128, activation='relu', name='1stFC')(x)
x = keras.layers.Dense(64, activation='relu', name='2ndFC')(x)
output = keras.layers.Dense(10, activation='softmax', name='lastFC')(x)
"""
model1 = keras.models.Model(inputs=input, outputs=output)
print(model1.summary())
'''
first_cv = model1.get_layer('1stCV').output
second_cv = model1.get_layer('2ndCV').output
model2 = keras.models.Model(inputs = input, outputs = second_cv)
print(model2. summary())
model3 = keras.models.Model(inputs = input, outputs = [first_cv, second_cv] ) #일케하면 multi output (이경우 2output) 임.
print(model3. summary())
input_image = test_images[0]
input_image = input_image.reshape(1,28, 28,1)
conv_outputs = model2(input_image)
#4D로 만들어줘야 한다. batch 자리도 만들어줘야 함. (1, 28, 28, 1)의 사이즈로 만들어야 함.
conv_outputs = conv_outputs.numpy()
conv_outputs = conv_outputs.reshape(14,14,64)
plt.figure(figsize=(12,12))
for i in range(64) :
plt.subplot(8,8,i+1)
plt.imshow(conv_outputs[:,:,i], cmap='gray' )
plt.xticks([])
plt.yticks([])
plt.show()
'''
model1.compile(loss = 'SparseCategoricalCrossentropy', optimizer='adam', metrics=['accuracy'])
#'adam' or gradient descent method
path = 'best_model1.h5'
modelCheckpoint = keras.callbacks.ModelCheckpoint(filepath=path, monitor='val_loss', save_best_only = True)
earlyStopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
history = model1.fit(train_images, train_labels, batch_size = 128, validation_data=(val_images, val_labels), callbacks=[modelCheckpoint, earlyStopping], epochs=1000)
model1.load_weights('best_model1.h5')
loss, acc = model1.evaluate(test_images, test_labels)
plt.plot(history.history['loss'], label='Train loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss values')
plt.legend()
plt.show()
plt.plot(history.history['accuracy'], label='Train accuracy')
plt.plot(history.history['val_accuracy'], label='Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Loss values')
plt.legend()
plt.show()
(50000, 32, 32, 3) (50000, 1) (10000, 32, 32, 3) (10000, 1)
(40000, 32, 32, 3)
40000
Model: "model_1"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_2 (InputLayer) [(None, 32, 32, 3)] 0 []
conv1_pad (ZeroPadding2D) (None, 38, 38, 3) 0 ['input_2[0][0]']
conv1_conv (Conv2D) (None, 16, 16, 64) 9472 ['conv1_pad[0][0]']
conv1_bn (BatchNormalization) (None, 16, 16, 64) 256 ['conv1_conv[0][0]']
conv1_relu (Activation) (None, 16, 16, 64) 0 ['conv1_bn[0][0]']
pool1_pad (ZeroPadding2D) (None, 18, 18, 64) 0 ['conv1_relu[0][0]']
pool1_pool (MaxPooling2D) (None, 8, 8, 64) 0 ['pool1_pad[0][0]']
conv2_block1_1_conv (Conv2D) (None, 8, 8, 64) 4160 ['pool1_pool[0][0]']
conv2_block1_1_bn (BatchNormal (None, 8, 8, 64) 256 ['conv2_block1_1_conv[0][0]']
ization)
conv2_block1_1_relu (Activatio (None, 8, 8, 64) 0 ['conv2_block1_1_bn[0][0]']
n)
conv2_block1_2_conv (Conv2D) (None, 8, 8, 64) 36928 ['conv2_block1_1_relu[0][0]']
conv2_block1_2_bn (BatchNormal (None, 8, 8, 64) 256 ['conv2_block1_2_conv[0][0]']
ization)
conv2_block1_2_relu (Activatio (None, 8, 8, 64) 0 ['conv2_block1_2_bn[0][0]']
n)
conv2_block1_0_conv (Conv2D) (None, 8, 8, 256) 16640 ['pool1_pool[0][0]']
conv2_block1_3_conv (Conv2D) (None, 8, 8, 256) 16640 ['conv2_block1_2_relu[0][0]']
conv2_block1_0_bn (BatchNormal (None, 8, 8, 256) 1024 ['conv2_block1_0_conv[0][0]']
ization)
conv2_block1_3_bn (BatchNormal (None, 8, 8, 256) 1024 ['conv2_block1_3_conv[0][0]']
ization)
conv2_block1_add (Add) (None, 8, 8, 256) 0 ['conv2_block1_0_bn[0][0]',
'conv2_block1_3_bn[0][0]']
conv2_block1_out (Activation) (None, 8, 8, 256) 0 ['conv2_block1_add[0][0]']
conv2_block2_1_conv (Conv2D) (None, 8, 8, 64) 16448 ['conv2_block1_out[0][0]']
conv2_block2_1_bn (BatchNormal (None, 8, 8, 64) 256 ['conv2_block2_1_conv[0][0]']
ization)
conv2_block2_1_relu (Activatio (None, 8, 8, 64) 0 ['conv2_block2_1_bn[0][0]']
n)
conv2_block2_2_conv (Conv2D) (None, 8, 8, 64) 36928 ['conv2_block2_1_relu[0][0]']
conv2_block2_2_bn (BatchNormal (None, 8, 8, 64) 256 ['conv2_block2_2_conv[0][0]']
ization)
conv2_block2_2_relu (Activatio (None, 8, 8, 64) 0 ['conv2_block2_2_bn[0][0]']
n)
conv2_block2_3_conv (Conv2D) (None, 8, 8, 256) 16640 ['conv2_block2_2_relu[0][0]']
conv2_block2_3_bn (BatchNormal (None, 8, 8, 256) 1024 ['conv2_block2_3_conv[0][0]']
ization)
conv2_block2_add (Add) (None, 8, 8, 256) 0 ['conv2_block1_out[0][0]',
'conv2_block2_3_bn[0][0]']
conv2_block2_out (Activation) (None, 8, 8, 256) 0 ['conv2_block2_add[0][0]']
conv2_block3_1_conv (Conv2D) (None, 8, 8, 64) 16448 ['conv2_block2_out[0][0]']
conv2_block3_1_bn (BatchNormal (None, 8, 8, 64) 256 ['conv2_block3_1_conv[0][0]']
ization)
conv2_block3_1_relu (Activatio (None, 8, 8, 64) 0 ['conv2_block3_1_bn[0][0]']
n)
conv2_block3_2_conv (Conv2D) (None, 8, 8, 64) 36928 ['conv2_block3_1_relu[0][0]']
conv2_block3_2_bn (BatchNormal (None, 8, 8, 64) 256 ['conv2_block3_2_conv[0][0]']
ization)
conv2_block3_2_relu (Activatio (None, 8, 8, 64) 0 ['conv2_block3_2_bn[0][0]']
n)
conv2_block3_3_conv (Conv2D) (None, 8, 8, 256) 16640 ['conv2_block3_2_relu[0][0]']
conv2_block3_3_bn (BatchNormal (None, 8, 8, 256) 1024 ['conv2_block3_3_conv[0][0]']
ization)
conv2_block3_add (Add) (None, 8, 8, 256) 0 ['conv2_block2_out[0][0]',
'conv2_block3_3_bn[0][0]']
conv2_block3_out (Activation) (None, 8, 8, 256) 0 ['conv2_block3_add[0][0]']
conv3_block1_1_conv (Conv2D) (None, 4, 4, 128) 32896 ['conv2_block3_out[0][0]']
conv3_block1_1_bn (BatchNormal (None, 4, 4, 128) 512 ['conv3_block1_1_conv[0][0]']
ization)
conv3_block1_1_relu (Activatio (None, 4, 4, 128) 0 ['conv3_block1_1_bn[0][0]']
n)
conv3_block1_2_conv (Conv2D) (None, 4, 4, 128) 147584 ['conv3_block1_1_relu[0][0]']
conv3_block1_2_bn (BatchNormal (None, 4, 4, 128) 512 ['conv3_block1_2_conv[0][0]']
ization)
conv3_block1_2_relu (Activatio (None, 4, 4, 128) 0 ['conv3_block1_2_bn[0][0]']
n)
conv3_block1_0_conv (Conv2D) (None, 4, 4, 512) 131584 ['conv2_block3_out[0][0]']
conv3_block1_3_conv (Conv2D) (None, 4, 4, 512) 66048 ['conv3_block1_2_relu[0][0]']
conv3_block1_0_bn (BatchNormal (None, 4, 4, 512) 2048 ['conv3_block1_0_conv[0][0]']
ization)
conv3_block1_3_bn (BatchNormal (None, 4, 4, 512) 2048 ['conv3_block1_3_conv[0][0]']
ization)
conv3_block1_add (Add) (None, 4, 4, 512) 0 ['conv3_block1_0_bn[0][0]',
'conv3_block1_3_bn[0][0]']
conv3_block1_out (Activation) (None, 4, 4, 512) 0 ['conv3_block1_add[0][0]']
conv3_block2_1_conv (Conv2D) (None, 4, 4, 128) 65664 ['conv3_block1_out[0][0]']
conv3_block2_1_bn (BatchNormal (None, 4, 4, 128) 512 ['conv3_block2_1_conv[0][0]']
ization)
conv3_block2_1_relu (Activatio (None, 4, 4, 128) 0 ['conv3_block2_1_bn[0][0]']
n)
conv3_block2_2_conv (Conv2D) (None, 4, 4, 128) 147584 ['conv3_block2_1_relu[0][0]']
conv3_block2_2_bn (BatchNormal (None, 4, 4, 128) 512 ['conv3_block2_2_conv[0][0]']
ization)
conv3_block2_2_relu (Activatio (None, 4, 4, 128) 0 ['conv3_block2_2_bn[0][0]']
n)
conv3_block2_3_conv (Conv2D) (None, 4, 4, 512) 66048 ['conv3_block2_2_relu[0][0]']
conv3_block2_3_bn (BatchNormal (None, 4, 4, 512) 2048 ['conv3_block2_3_conv[0][0]']
ization)
conv3_block2_add (Add) (None, 4, 4, 512) 0 ['conv3_block1_out[0][0]',
'conv3_block2_3_bn[0][0]']
conv3_block2_out (Activation) (None, 4, 4, 512) 0 ['conv3_block2_add[0][0]']
conv3_block3_1_conv (Conv2D) (None, 4, 4, 128) 65664 ['conv3_block2_out[0][0]']
conv3_block3_1_bn (BatchNormal (None, 4, 4, 128) 512 ['conv3_block3_1_conv[0][0]']
ization)
conv3_block3_1_relu (Activatio (None, 4, 4, 128) 0 ['conv3_block3_1_bn[0][0]']
n)
conv3_block3_2_conv (Conv2D) (None, 4, 4, 128) 147584 ['conv3_block3_1_relu[0][0]']
conv3_block3_2_bn (BatchNormal (None, 4, 4, 128) 512 ['conv3_block3_2_conv[0][0]']
ization)
conv3_block3_2_relu (Activatio (None, 4, 4, 128) 0 ['conv3_block3_2_bn[0][0]']
n)
conv3_block3_3_conv (Conv2D) (None, 4, 4, 512) 66048 ['conv3_block3_2_relu[0][0]']
conv3_block3_3_bn (BatchNormal (None, 4, 4, 512) 2048 ['conv3_block3_3_conv[0][0]']
ization)
conv3_block3_add (Add) (None, 4, 4, 512) 0 ['conv3_block2_out[0][0]',
'conv3_block3_3_bn[0][0]']
conv3_block3_out (Activation) (None, 4, 4, 512) 0 ['conv3_block3_add[0][0]']
conv3_block4_1_conv (Conv2D) (None, 4, 4, 128) 65664 ['conv3_block3_out[0][0]']
conv3_block4_1_bn (BatchNormal (None, 4, 4, 128) 512 ['conv3_block4_1_conv[0][0]']
ization)
conv3_block4_1_relu (Activatio (None, 4, 4, 128) 0 ['conv3_block4_1_bn[0][0]']
n)
conv3_block4_2_conv (Conv2D) (None, 4, 4, 128) 147584 ['conv3_block4_1_relu[0][0]']
conv3_block4_2_bn (BatchNormal (None, 4, 4, 128) 512 ['conv3_block4_2_conv[0][0]']
ization)
conv3_block4_2_relu (Activatio (None, 4, 4, 128) 0 ['conv3_block4_2_bn[0][0]']
n)
conv3_block4_3_conv (Conv2D) (None, 4, 4, 512) 66048 ['conv3_block4_2_relu[0][0]']
conv3_block4_3_bn (BatchNormal (None, 4, 4, 512) 2048 ['conv3_block4_3_conv[0][0]']
ization)
conv3_block4_add (Add) (None, 4, 4, 512) 0 ['conv3_block3_out[0][0]',
'conv3_block4_3_bn[0][0]']
conv3_block4_out (Activation) (None, 4, 4, 512) 0 ['conv3_block4_add[0][0]']
conv4_block1_1_conv (Conv2D) (None, 2, 2, 256) 131328 ['conv3_block4_out[0][0]']
conv4_block1_1_bn (BatchNormal (None, 2, 2, 256) 1024 ['conv4_block1_1_conv[0][0]']
ization)
conv4_block1_1_relu (Activatio (None, 2, 2, 256) 0 ['conv4_block1_1_bn[0][0]']
n)
conv4_block1_2_conv (Conv2D) (None, 2, 2, 256) 590080 ['conv4_block1_1_relu[0][0]']
conv4_block1_2_bn (BatchNormal (None, 2, 2, 256) 1024 ['conv4_block1_2_conv[0][0]']
ization)
conv4_block1_2_relu (Activatio (None, 2, 2, 256) 0 ['conv4_block1_2_bn[0][0]']
n)
conv4_block1_0_conv (Conv2D) (None, 2, 2, 1024) 525312 ['conv3_block4_out[0][0]']
conv4_block1_3_conv (Conv2D) (None, 2, 2, 1024) 263168 ['conv4_block1_2_relu[0][0]']
conv4_block1_0_bn (BatchNormal (None, 2, 2, 1024) 4096 ['conv4_block1_0_conv[0][0]']
ization)
conv4_block1_3_bn (BatchNormal (None, 2, 2, 1024) 4096 ['conv4_block1_3_conv[0][0]']
ization)
conv4_block1_add (Add) (None, 2, 2, 1024) 0 ['conv4_block1_0_bn[0][0]',
'conv4_block1_3_bn[0][0]']
conv4_block1_out (Activation) (None, 2, 2, 1024) 0 ['conv4_block1_add[0][0]']
conv4_block2_1_conv (Conv2D) (None, 2, 2, 256) 262400 ['conv4_block1_out[0][0]']
conv4_block2_1_bn (BatchNormal (None, 2, 2, 256) 1024 ['conv4_block2_1_conv[0][0]']
ization)
conv4_block2_1_relu (Activatio (None, 2, 2, 256) 0 ['conv4_block2_1_bn[0][0]']
n)
conv4_block2_2_conv (Conv2D) (None, 2, 2, 256) 590080 ['conv4_block2_1_relu[0][0]']
conv4_block2_2_bn (BatchNormal (None, 2, 2, 256) 1024 ['conv4_block2_2_conv[0][0]']
ization)
conv4_block2_2_relu (Activatio (None, 2, 2, 256) 0 ['conv4_block2_2_bn[0][0]']
n)
conv4_block2_3_conv (Conv2D) (None, 2, 2, 1024) 263168 ['conv4_block2_2_relu[0][0]']
conv4_block2_3_bn (BatchNormal (None, 2, 2, 1024) 4096 ['conv4_block2_3_conv[0][0]']
ization)
conv4_block2_add (Add) (None, 2, 2, 1024) 0 ['conv4_block1_out[0][0]',
'conv4_block2_3_bn[0][0]']
conv4_block2_out (Activation) (None, 2, 2, 1024) 0 ['conv4_block2_add[0][0]']
conv4_block3_1_conv (Conv2D) (None, 2, 2, 256) 262400 ['conv4_block2_out[0][0]']
conv4_block3_1_bn (BatchNormal (None, 2, 2, 256) 1024 ['conv4_block3_1_conv[0][0]']
ization)
conv4_block3_1_relu (Activatio (None, 2, 2, 256) 0 ['conv4_block3_1_bn[0][0]']
n)
conv4_block3_2_conv (Conv2D) (None, 2, 2, 256) 590080 ['conv4_block3_1_relu[0][0]']
conv4_block3_2_bn (BatchNormal (None, 2, 2, 256) 1024 ['conv4_block3_2_conv[0][0]']
ization)
conv4_block3_2_relu (Activatio (None, 2, 2, 256) 0 ['conv4_block3_2_bn[0][0]']
n)
conv4_block3_3_conv (Conv2D) (None, 2, 2, 1024) 263168 ['conv4_block3_2_relu[0][0]']
conv4_block3_3_bn (BatchNormal (None, 2, 2, 1024) 4096 ['conv4_block3_3_conv[0][0]']
ization)
conv4_block3_add (Add) (None, 2, 2, 1024) 0 ['conv4_block2_out[0][0]',
'conv4_block3_3_bn[0][0]']
conv4_block3_out (Activation) (None, 2, 2, 1024) 0 ['conv4_block3_add[0][0]']
conv4_block4_1_conv (Conv2D) (None, 2, 2, 256) 262400 ['conv4_block3_out[0][0]']
conv4_block4_1_bn (BatchNormal (None, 2, 2, 256) 1024 ['conv4_block4_1_conv[0][0]']
ization)
conv4_block4_1_relu (Activatio (None, 2, 2, 256) 0 ['conv4_block4_1_bn[0][0]']
n)
conv4_block4_2_conv (Conv2D) (None, 2, 2, 256) 590080 ['conv4_block4_1_relu[0][0]']
conv4_block4_2_bn (BatchNormal (None, 2, 2, 256) 1024 ['conv4_block4_2_conv[0][0]']
ization)
conv4_block4_2_relu (Activatio (None, 2, 2, 256) 0 ['conv4_block4_2_bn[0][0]']
n)
conv4_block4_3_conv (Conv2D) (None, 2, 2, 1024) 263168 ['conv4_block4_2_relu[0][0]']
conv4_block4_3_bn (BatchNormal (None, 2, 2, 1024) 4096 ['conv4_block4_3_conv[0][0]']
ization)
conv4_block4_add (Add) (None, 2, 2, 1024) 0 ['conv4_block3_out[0][0]',
'conv4_block4_3_bn[0][0]']
conv4_block4_out (Activation) (None, 2, 2, 1024) 0 ['conv4_block4_add[0][0]']
conv4_block5_1_conv (Conv2D) (None, 2, 2, 256) 262400 ['conv4_block4_out[0][0]']
conv4_block5_1_bn (BatchNormal (None, 2, 2, 256) 1024 ['conv4_block5_1_conv[0][0]']
ization)
conv4_block5_1_relu (Activatio (None, 2, 2, 256) 0 ['conv4_block5_1_bn[0][0]']
n)
conv4_block5_2_conv (Conv2D) (None, 2, 2, 256) 590080 ['conv4_block5_1_relu[0][0]']
conv4_block5_2_bn (BatchNormal (None, 2, 2, 256) 1024 ['conv4_block5_2_conv[0][0]']
ization)
conv4_block5_2_relu (Activatio (None, 2, 2, 256) 0 ['conv4_block5_2_bn[0][0]']
n)
conv4_block5_3_conv (Conv2D) (None, 2, 2, 1024) 263168 ['conv4_block5_2_relu[0][0]']
conv4_block5_3_bn (BatchNormal (None, 2, 2, 1024) 4096 ['conv4_block5_3_conv[0][0]']
ization)
conv4_block5_add (Add) (None, 2, 2, 1024) 0 ['conv4_block4_out[0][0]',
'conv4_block5_3_bn[0][0]']
conv4_block5_out (Activation) (None, 2, 2, 1024) 0 ['conv4_block5_add[0][0]']
conv4_block6_1_conv (Conv2D) (None, 2, 2, 256) 262400 ['conv4_block5_out[0][0]']
conv4_block6_1_bn (BatchNormal (None, 2, 2, 256) 1024 ['conv4_block6_1_conv[0][0]']
ization)
conv4_block6_1_relu (Activatio (None, 2, 2, 256) 0 ['conv4_block6_1_bn[0][0]']
n)
conv4_block6_2_conv (Conv2D) (None, 2, 2, 256) 590080 ['conv4_block6_1_relu[0][0]']
conv4_block6_2_bn (BatchNormal (None, 2, 2, 256) 1024 ['conv4_block6_2_conv[0][0]']
ization)
conv4_block6_2_relu (Activatio (None, 2, 2, 256) 0 ['conv4_block6_2_bn[0][0]']
n)
conv4_block6_3_conv (Conv2D) (None, 2, 2, 1024) 263168 ['conv4_block6_2_relu[0][0]']
conv4_block6_3_bn (BatchNormal (None, 2, 2, 1024) 4096 ['conv4_block6_3_conv[0][0]']
ization)
conv4_block6_add (Add) (None, 2, 2, 1024) 0 ['conv4_block5_out[0][0]',
'conv4_block6_3_bn[0][0]']
conv4_block6_out (Activation) (None, 2, 2, 1024) 0 ['conv4_block6_add[0][0]']
conv5_block1_1_conv (Conv2D) (None, 1, 1, 512) 524800 ['conv4_block6_out[0][0]']
conv5_block1_1_bn (BatchNormal (None, 1, 1, 512) 2048 ['conv5_block1_1_conv[0][0]']
ization)
conv5_block1_1_relu (Activatio (None, 1, 1, 512) 0 ['conv5_block1_1_bn[0][0]']
n)
conv5_block1_2_conv (Conv2D) (None, 1, 1, 512) 2359808 ['conv5_block1_1_relu[0][0]']
conv5_block1_2_bn (BatchNormal (None, 1, 1, 512) 2048 ['conv5_block1_2_conv[0][0]']
ization)
conv5_block1_2_relu (Activatio (None, 1, 1, 512) 0 ['conv5_block1_2_bn[0][0]']
n)
conv5_block1_0_conv (Conv2D) (None, 1, 1, 2048) 2099200 ['conv4_block6_out[0][0]']
conv5_block1_3_conv (Conv2D) (None, 1, 1, 2048) 1050624 ['conv5_block1_2_relu[0][0]']
conv5_block1_0_bn (BatchNormal (None, 1, 1, 2048) 8192 ['conv5_block1_0_conv[0][0]']
ization)
conv5_block1_3_bn (BatchNormal (None, 1, 1, 2048) 8192 ['conv5_block1_3_conv[0][0]']
ization)
conv5_block1_add (Add) (None, 1, 1, 2048) 0 ['conv5_block1_0_bn[0][0]',
'conv5_block1_3_bn[0][0]']
conv5_block1_out (Activation) (None, 1, 1, 2048) 0 ['conv5_block1_add[0][0]']
conv5_block2_1_conv (Conv2D) (None, 1, 1, 512) 1049088 ['conv5_block1_out[0][0]']
conv5_block2_1_bn (BatchNormal (None, 1, 1, 512) 2048 ['conv5_block2_1_conv[0][0]']
ization)
conv5_block2_1_relu (Activatio (None, 1, 1, 512) 0 ['conv5_block2_1_bn[0][0]']
n)
conv5_block2_2_conv (Conv2D) (None, 1, 1, 512) 2359808 ['conv5_block2_1_relu[0][0]']
conv5_block2_2_bn (BatchNormal (None, 1, 1, 512) 2048 ['conv5_block2_2_conv[0][0]']
ization)
conv5_block2_2_relu (Activatio (None, 1, 1, 512) 0 ['conv5_block2_2_bn[0][0]']
n)
conv5_block2_3_conv (Conv2D) (None, 1, 1, 2048) 1050624 ['conv5_block2_2_relu[0][0]']
conv5_block2_3_bn (BatchNormal (None, 1, 1, 2048) 8192 ['conv5_block2_3_conv[0][0]']
ization)
conv5_block2_add (Add) (None, 1, 1, 2048) 0 ['conv5_block1_out[0][0]',
'conv5_block2_3_bn[0][0]']
conv5_block2_out (Activation) (None, 1, 1, 2048) 0 ['conv5_block2_add[0][0]']
conv5_block3_1_conv (Conv2D) (None, 1, 1, 512) 1049088 ['conv5_block2_out[0][0]']
conv5_block3_1_bn (BatchNormal (None, 1, 1, 512) 2048 ['conv5_block3_1_conv[0][0]']
ization)
conv5_block3_1_relu (Activatio (None, 1, 1, 512) 0 ['conv5_block3_1_bn[0][0]']
n)
conv5_block3_2_conv (Conv2D) (None, 1, 1, 512) 2359808 ['conv5_block3_1_relu[0][0]']
conv5_block3_2_bn (BatchNormal (None, 1, 1, 512) 2048 ['conv5_block3_2_conv[0][0]']
ization)
conv5_block3_2_relu (Activatio (None, 1, 1, 512) 0 ['conv5_block3_2_bn[0][0]']
n)
conv5_block3_3_conv (Conv2D) (None, 1, 1, 2048) 1050624 ['conv5_block3_2_relu[0][0]']
conv5_block3_3_bn (BatchNormal (None, 1, 1, 2048) 8192 ['conv5_block3_3_conv[0][0]']
ization)
conv5_block3_add (Add) (None, 1, 1, 2048) 0 ['conv5_block2_out[0][0]',
'conv5_block3_3_bn[0][0]']
conv5_block3_out (Activation) (None, 1, 1, 2048) 0 ['conv5_block3_add[0][0]']
global_average_pooling2d (Glob (None, 2048) 0 ['conv5_block3_out[0][0]']
alAveragePooling2D)
1stFC (Dense) (None, 128) 262272 ['global_average_pooling2d[0][0]'
]
2ndFC (Dense) (None, 64) 8256 ['1stFC[0][0]']
lastFC (Dense) (None, 10) 650 ['2ndFC[0][0]']
==================================================================================================
Total params: 23,858,890
Trainable params: 23,805,770
Non-trainable params: 53,120
__________________________________________________________________________________________________
None
Epoch 1/1000
313/313 [==============================] - ETA: 0s - loss: 1.1218 - accuracy: 0.6265
/usr/local/lib/python3.7/dist-packages/keras/engine/functional.py:1410: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument. layer_config = serialize_layer_fn(layer)
313/313 [==============================] - 65s 177ms/step - loss: 1.1218 - accuracy: 0.6265 - val_loss: 3.3489 - val_accuracy: 0.1002 Epoch 2/1000 313/313 [==============================] - 50s 160ms/step - loss: 0.7030 - accuracy: 0.7676 - val_loss: 3.3082 - val_accuracy: 0.1956 Epoch 3/1000 313/313 [==============================] - 50s 161ms/step - loss: 0.6081 - accuracy: 0.7982 - val_loss: 1.9134 - val_accuracy: 0.4667 Epoch 4/1000 313/313 [==============================] - 52s 167ms/step - loss: 0.4627 - accuracy: 0.8450 - val_loss: 0.8296 - val_accuracy: 0.7464 Epoch 5/1000 313/313 [==============================] - 50s 161ms/step - loss: 0.3941 - accuracy: 0.8687 - val_loss: 0.7520 - val_accuracy: 0.7715 Epoch 6/1000 313/313 [==============================] - 49s 156ms/step - loss: 0.3236 - accuracy: 0.8903 - val_loss: 0.8680 - val_accuracy: 0.7345 Epoch 7/1000 313/313 [==============================] - 49s 156ms/step - loss: 0.2738 - accuracy: 0.9095 - val_loss: 1.3141 - val_accuracy: 0.6630 Epoch 8/1000 313/313 [==============================] - 51s 162ms/step - loss: 0.2870 - accuracy: 0.9075 - val_loss: 2.9562 - val_accuracy: 0.5808 Epoch 9/1000 313/313 [==============================] - 49s 156ms/step - loss: 0.2482 - accuracy: 0.9186 - val_loss: 0.7841 - val_accuracy: 0.7866 Epoch 10/1000 313/313 [==============================] - 49s 156ms/step - loss: 0.1625 - accuracy: 0.9463 - val_loss: 1.0391 - val_accuracy: 0.7380 313/313 [==============================] - 10s 30ms/step - loss: 0.7675 - accuracy: 0.7639
요게 ResNet50의 output이다.
#import Library
from tensorflow import keras
import matplotlib.pyplot as plt # from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.resnet50 import ResNet50 #152 중 50 layer 쓴다(?) bring trained model
import tensorflow as tf #✨
# keras.datasets. #하면 7가지 예제 있다. boston_housing 등
#(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()
#fashion = keras.datasets.fashion_mnist
#(train_images, train_labels), (test_images, test_labels) = fashion.load_data()
(train_images, train_labels), (test_images, test_labels) = keras.datasets.cifar10.load_data()
print(train_images.shape, train_labels.shape, test_images.shape, test_labels.shape)
#하면 (50000, 32, 32, 3) (50000, 1) (10000, 32, 32, 3) (10000, 1)로,
#3channel(RGB)임을 알 수 있다.
"""
plt.figure(figsize=(15,15))
plt.plot(5,5)
for i in range(25) :
plt.subplot(5,5,i+1)
plt.imshow(train_images[i])
plt.xticks([])
plt.yticks([])
plt.xlabel(f'This is {train_labels[i]}')
plt.ylabel('32x32 image')
plt.title('This is cifar10 image')
plt.show()
"""
#32 x 32 이미지라서 이미지 화질 구리다.
#✨
#이미지 크지 조정 : 정확도 올라감. 이미지 확대
#이렇게 tf 쓰는거 말고도 OpenCV 등의 방식 있다.
#32by32에서 점점 작아지는 거보다
#더 크게 해놓고 3by3 으로 줄이는 게 more parameter라서 정확도 좋다.
#train_images = tf.image.resize(images=train_images, size=(75, 75)) 이건 type이 numpy므로 tensor로 바꿔주자.
train_images = tf.image.resize(images=train_images, size=(75, 75)).numpy()
test_images = tf.image.resize(images=test_images, size=(75, 75)).numpy()
#✨
#Data Normalization : 0~255 scale to 0~1
train_images = train_images / 255.0
test_images = test_images / 255.0
train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.2, stratify = train_labels , random_state = 99 )
#stratify 는 나눠지는 (라벨 비율이 같게!) 거의 밸런스 맞춰지게 나눠짐.
print(train_images.shape)
print(train_images.shape[0])
#Modeling
base_model = ResNet50( include_top=False, input_shape=(75,75,3) ) #✨
#우리는 10개 output 필요한데, 1000개 제공하므로 FC(fully connected layer)(top) 필요 없다.
#weights = 'imagent'이란 파라미터는 그 대회에서 쓴 거가 default
input = base_model.input
x = base_model.output
#Flatten 대신 gap 쓰자
x = keras.layers.GlobalAveragePooling2D()(x)
#FC 추가해주자.
x = keras.layers.Dense(128, activation='relu', name='1stFC')(x)
x = keras.layers.Dense(64, activation='relu', name='2ndFC')(x)
output = keras.layers.Dense(10, activation='softmax', name='lastFC')(x)
"""
input = keras.layers.Input(shape=(32,32,3), name='Input_layer')
x = keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', name='1stCV')(input)
x1 = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='1stMP')(x)
x = keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu', name='2ndCV')(x1)
x = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='2ndMP')(x)
x = keras.layers.Flatten(name='Flatten')(x)
x = keras.layers.Dense(128, activation='relu', name='1stFC')(x)
x = keras.layers.Dense(64, activation='relu', name='2ndFC')(x)
output = keras.layers.Dense(10, activation='softmax', name='lastFC')(x)
"""
model1 = keras.models.Model(inputs=input, outputs=output)
print(model1.summary())
'''
first_cv = model1.get_layer('1stCV').output
second_cv = model1.get_layer('2ndCV').output
model2 = keras.models.Model(inputs = input, outputs = second_cv)
print(model2. summary())
model3 = keras.models.Model(inputs = input, outputs = [first_cv, second_cv] ) #일케하면 multi output (이경우 2output) 임.
print(model3. summary())
input_image = test_images[0]
input_image = input_image.reshape(1,28, 28,1)
conv_outputs = model2(input_image)
#4D로 만들어줘야 한다. batch 자리도 만들어줘야 함. (1, 28, 28, 1)의 사이즈로 만들어야 함.
conv_outputs = conv_outputs.numpy()
conv_outputs = conv_outputs.reshape(14,14,64)
plt.figure(figsize=(12,12))
for i in range(64) :
plt.subplot(8,8,i+1)
plt.imshow(conv_outputs[:,:,i], cmap='gray' )
plt.xticks([])
plt.yticks([])
plt.show()
'''
model1.compile(loss = 'SparseCategoricalCrossentropy', optimizer='adam', metrics=['accuracy'])
#'adam' or gradient descent method
path = 'best_model1.h5'
modelCheckpoint = keras.callbacks.ModelCheckpoint(filepath=path, monitor='val_loss', save_best_only = True)
earlyStopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
history = model1.fit(train_images, train_labels, batch_size = 128, validation_data=(val_images, val_labels), callbacks=[modelCheckpoint, earlyStopping], epochs=1000)
model1.load_weights('best_model1.h5')
loss, acc = model1.evaluate(test_images, test_labels)
plt.plot(history.history['loss'], label='Train loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss values')
plt.legend()
plt.show()
plt.plot(history.history['accuracy'], label='Train accuracy')
plt.plot(history.history['val_accuracy'], label='Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Loss values')
plt.legend()
plt.show()
(50000, 32, 32, 3) (50000, 1) (10000, 32, 32, 3) (10000, 1)
(40000, 75, 75, 3)
40000
Model: "model_2"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_3 (InputLayer) [(None, 75, 75, 3)] 0 []
conv1_pad (ZeroPadding2D) (None, 81, 81, 3) 0 ['input_3[0][0]']
conv1_conv (Conv2D) (None, 38, 38, 64) 9472 ['conv1_pad[0][0]']
conv1_bn (BatchNormalization) (None, 38, 38, 64) 256 ['conv1_conv[0][0]']
conv1_relu (Activation) (None, 38, 38, 64) 0 ['conv1_bn[0][0]']
pool1_pad (ZeroPadding2D) (None, 40, 40, 64) 0 ['conv1_relu[0][0]']
pool1_pool (MaxPooling2D) (None, 19, 19, 64) 0 ['pool1_pad[0][0]']
conv2_block1_1_conv (Conv2D) (None, 19, 19, 64) 4160 ['pool1_pool[0][0]']
conv2_block1_1_bn (BatchNormal (None, 19, 19, 64) 256 ['conv2_block1_1_conv[0][0]']
ization)
conv2_block1_1_relu (Activatio (None, 19, 19, 64) 0 ['conv2_block1_1_bn[0][0]']
n)
conv2_block1_2_conv (Conv2D) (None, 19, 19, 64) 36928 ['conv2_block1_1_relu[0][0]']
conv2_block1_2_bn (BatchNormal (None, 19, 19, 64) 256 ['conv2_block1_2_conv[0][0]']
ization)
conv2_block1_2_relu (Activatio (None, 19, 19, 64) 0 ['conv2_block1_2_bn[0][0]']
n)
conv2_block1_0_conv (Conv2D) (None, 19, 19, 256) 16640 ['pool1_pool[0][0]']
conv2_block1_3_conv (Conv2D) (None, 19, 19, 256) 16640 ['conv2_block1_2_relu[0][0]']
conv2_block1_0_bn (BatchNormal (None, 19, 19, 256) 1024 ['conv2_block1_0_conv[0][0]']
ization)
conv2_block1_3_bn (BatchNormal (None, 19, 19, 256) 1024 ['conv2_block1_3_conv[0][0]']
ization)
conv2_block1_add (Add) (None, 19, 19, 256) 0 ['conv2_block1_0_bn[0][0]',
'conv2_block1_3_bn[0][0]']
conv2_block1_out (Activation) (None, 19, 19, 256) 0 ['conv2_block1_add[0][0]']
conv2_block2_1_conv (Conv2D) (None, 19, 19, 64) 16448 ['conv2_block1_out[0][0]']
conv2_block2_1_bn (BatchNormal (None, 19, 19, 64) 256 ['conv2_block2_1_conv[0][0]']
ization)
conv2_block2_1_relu (Activatio (None, 19, 19, 64) 0 ['conv2_block2_1_bn[0][0]']
n)
conv2_block2_2_conv (Conv2D) (None, 19, 19, 64) 36928 ['conv2_block2_1_relu[0][0]']
conv2_block2_2_bn (BatchNormal (None, 19, 19, 64) 256 ['conv2_block2_2_conv[0][0]']
ization)
conv2_block2_2_relu (Activatio (None, 19, 19, 64) 0 ['conv2_block2_2_bn[0][0]']
n)
conv2_block2_3_conv (Conv2D) (None, 19, 19, 256) 16640 ['conv2_block2_2_relu[0][0]']
conv2_block2_3_bn (BatchNormal (None, 19, 19, 256) 1024 ['conv2_block2_3_conv[0][0]']
ization)
conv2_block2_add (Add) (None, 19, 19, 256) 0 ['conv2_block1_out[0][0]',
'conv2_block2_3_bn[0][0]']
conv2_block2_out (Activation) (None, 19, 19, 256) 0 ['conv2_block2_add[0][0]']
conv2_block3_1_conv (Conv2D) (None, 19, 19, 64) 16448 ['conv2_block2_out[0][0]']
conv2_block3_1_bn (BatchNormal (None, 19, 19, 64) 256 ['conv2_block3_1_conv[0][0]']
ization)
conv2_block3_1_relu (Activatio (None, 19, 19, 64) 0 ['conv2_block3_1_bn[0][0]']
n)
conv2_block3_2_conv (Conv2D) (None, 19, 19, 64) 36928 ['conv2_block3_1_relu[0][0]']
conv2_block3_2_bn (BatchNormal (None, 19, 19, 64) 256 ['conv2_block3_2_conv[0][0]']
ization)
conv2_block3_2_relu (Activatio (None, 19, 19, 64) 0 ['conv2_block3_2_bn[0][0]']
n)
conv2_block3_3_conv (Conv2D) (None, 19, 19, 256) 16640 ['conv2_block3_2_relu[0][0]']
conv2_block3_3_bn (BatchNormal (None, 19, 19, 256) 1024 ['conv2_block3_3_conv[0][0]']
ization)
conv2_block3_add (Add) (None, 19, 19, 256) 0 ['conv2_block2_out[0][0]',
'conv2_block3_3_bn[0][0]']
conv2_block3_out (Activation) (None, 19, 19, 256) 0 ['conv2_block3_add[0][0]']
conv3_block1_1_conv (Conv2D) (None, 10, 10, 128) 32896 ['conv2_block3_out[0][0]']
conv3_block1_1_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block1_1_conv[0][0]']
ization)
conv3_block1_1_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block1_1_bn[0][0]']
n)
conv3_block1_2_conv (Conv2D) (None, 10, 10, 128) 147584 ['conv3_block1_1_relu[0][0]']
conv3_block1_2_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block1_2_conv[0][0]']
ization)
conv3_block1_2_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block1_2_bn[0][0]']
n)
conv3_block1_0_conv (Conv2D) (None, 10, 10, 512) 131584 ['conv2_block3_out[0][0]']
conv3_block1_3_conv (Conv2D) (None, 10, 10, 512) 66048 ['conv3_block1_2_relu[0][0]']
conv3_block1_0_bn (BatchNormal (None, 10, 10, 512) 2048 ['conv3_block1_0_conv[0][0]']
ization)
conv3_block1_3_bn (BatchNormal (None, 10, 10, 512) 2048 ['conv3_block1_3_conv[0][0]']
ization)
conv3_block1_add (Add) (None, 10, 10, 512) 0 ['conv3_block1_0_bn[0][0]',
'conv3_block1_3_bn[0][0]']
conv3_block1_out (Activation) (None, 10, 10, 512) 0 ['conv3_block1_add[0][0]']
conv3_block2_1_conv (Conv2D) (None, 10, 10, 128) 65664 ['conv3_block1_out[0][0]']
conv3_block2_1_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block2_1_conv[0][0]']
ization)
conv3_block2_1_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block2_1_bn[0][0]']
n)
conv3_block2_2_conv (Conv2D) (None, 10, 10, 128) 147584 ['conv3_block2_1_relu[0][0]']
conv3_block2_2_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block2_2_conv[0][0]']
ization)
conv3_block2_2_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block2_2_bn[0][0]']
n)
conv3_block2_3_conv (Conv2D) (None, 10, 10, 512) 66048 ['conv3_block2_2_relu[0][0]']
conv3_block2_3_bn (BatchNormal (None, 10, 10, 512) 2048 ['conv3_block2_3_conv[0][0]']
ization)
conv3_block2_add (Add) (None, 10, 10, 512) 0 ['conv3_block1_out[0][0]',
'conv3_block2_3_bn[0][0]']
conv3_block2_out (Activation) (None, 10, 10, 512) 0 ['conv3_block2_add[0][0]']
conv3_block3_1_conv (Conv2D) (None, 10, 10, 128) 65664 ['conv3_block2_out[0][0]']
conv3_block3_1_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block3_1_conv[0][0]']
ization)
conv3_block3_1_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block3_1_bn[0][0]']
n)
conv3_block3_2_conv (Conv2D) (None, 10, 10, 128) 147584 ['conv3_block3_1_relu[0][0]']
conv3_block3_2_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block3_2_conv[0][0]']
ization)
conv3_block3_2_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block3_2_bn[0][0]']
n)
conv3_block3_3_conv (Conv2D) (None, 10, 10, 512) 66048 ['conv3_block3_2_relu[0][0]']
conv3_block3_3_bn (BatchNormal (None, 10, 10, 512) 2048 ['conv3_block3_3_conv[0][0]']
ization)
conv3_block3_add (Add) (None, 10, 10, 512) 0 ['conv3_block2_out[0][0]',
'conv3_block3_3_bn[0][0]']
conv3_block3_out (Activation) (None, 10, 10, 512) 0 ['conv3_block3_add[0][0]']
conv3_block4_1_conv (Conv2D) (None, 10, 10, 128) 65664 ['conv3_block3_out[0][0]']
conv3_block4_1_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block4_1_conv[0][0]']
ization)
conv3_block4_1_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block4_1_bn[0][0]']
n)
conv3_block4_2_conv (Conv2D) (None, 10, 10, 128) 147584 ['conv3_block4_1_relu[0][0]']
conv3_block4_2_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block4_2_conv[0][0]']
ization)
conv3_block4_2_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block4_2_bn[0][0]']
n)
conv3_block4_3_conv (Conv2D) (None, 10, 10, 512) 66048 ['conv3_block4_2_relu[0][0]']
conv3_block4_3_bn (BatchNormal (None, 10, 10, 512) 2048 ['conv3_block4_3_conv[0][0]']
ization)
conv3_block4_add (Add) (None, 10, 10, 512) 0 ['conv3_block3_out[0][0]',
'conv3_block4_3_bn[0][0]']
conv3_block4_out (Activation) (None, 10, 10, 512) 0 ['conv3_block4_add[0][0]']
conv4_block1_1_conv (Conv2D) (None, 5, 5, 256) 131328 ['conv3_block4_out[0][0]']
conv4_block1_1_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block1_1_conv[0][0]']
ization)
conv4_block1_1_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block1_1_bn[0][0]']
n)
conv4_block1_2_conv (Conv2D) (None, 5, 5, 256) 590080 ['conv4_block1_1_relu[0][0]']
conv4_block1_2_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block1_2_conv[0][0]']
ization)
conv4_block1_2_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block1_2_bn[0][0]']
n)
conv4_block1_0_conv (Conv2D) (None, 5, 5, 1024) 525312 ['conv3_block4_out[0][0]']
conv4_block1_3_conv (Conv2D) (None, 5, 5, 1024) 263168 ['conv4_block1_2_relu[0][0]']
conv4_block1_0_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block1_0_conv[0][0]']
ization)
conv4_block1_3_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block1_3_conv[0][0]']
ization)
conv4_block1_add (Add) (None, 5, 5, 1024) 0 ['conv4_block1_0_bn[0][0]',
'conv4_block1_3_bn[0][0]']
conv4_block1_out (Activation) (None, 5, 5, 1024) 0 ['conv4_block1_add[0][0]']
conv4_block2_1_conv (Conv2D) (None, 5, 5, 256) 262400 ['conv4_block1_out[0][0]']
conv4_block2_1_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block2_1_conv[0][0]']
ization)
conv4_block2_1_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block2_1_bn[0][0]']
n)
conv4_block2_2_conv (Conv2D) (None, 5, 5, 256) 590080 ['conv4_block2_1_relu[0][0]']
conv4_block2_2_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block2_2_conv[0][0]']
ization)
conv4_block2_2_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block2_2_bn[0][0]']
n)
conv4_block2_3_conv (Conv2D) (None, 5, 5, 1024) 263168 ['conv4_block2_2_relu[0][0]']
conv4_block2_3_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block2_3_conv[0][0]']
ization)
conv4_block2_add (Add) (None, 5, 5, 1024) 0 ['conv4_block1_out[0][0]',
'conv4_block2_3_bn[0][0]']
conv4_block2_out (Activation) (None, 5, 5, 1024) 0 ['conv4_block2_add[0][0]']
conv4_block3_1_conv (Conv2D) (None, 5, 5, 256) 262400 ['conv4_block2_out[0][0]']
conv4_block3_1_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block3_1_conv[0][0]']
ization)
conv4_block3_1_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block3_1_bn[0][0]']
n)
conv4_block3_2_conv (Conv2D) (None, 5, 5, 256) 590080 ['conv4_block3_1_relu[0][0]']
conv4_block3_2_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block3_2_conv[0][0]']
ization)
conv4_block3_2_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block3_2_bn[0][0]']
n)
conv4_block3_3_conv (Conv2D) (None, 5, 5, 1024) 263168 ['conv4_block3_2_relu[0][0]']
conv4_block3_3_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block3_3_conv[0][0]']
ization)
conv4_block3_add (Add) (None, 5, 5, 1024) 0 ['conv4_block2_out[0][0]',
'conv4_block3_3_bn[0][0]']
conv4_block3_out (Activation) (None, 5, 5, 1024) 0 ['conv4_block3_add[0][0]']
conv4_block4_1_conv (Conv2D) (None, 5, 5, 256) 262400 ['conv4_block3_out[0][0]']
conv4_block4_1_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block4_1_conv[0][0]']
ization)
conv4_block4_1_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block4_1_bn[0][0]']
n)
conv4_block4_2_conv (Conv2D) (None, 5, 5, 256) 590080 ['conv4_block4_1_relu[0][0]']
conv4_block4_2_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block4_2_conv[0][0]']
ization)
conv4_block4_2_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block4_2_bn[0][0]']
n)
conv4_block4_3_conv (Conv2D) (None, 5, 5, 1024) 263168 ['conv4_block4_2_relu[0][0]']
conv4_block4_3_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block4_3_conv[0][0]']
ization)
conv4_block4_add (Add) (None, 5, 5, 1024) 0 ['conv4_block3_out[0][0]',
'conv4_block4_3_bn[0][0]']
conv4_block4_out (Activation) (None, 5, 5, 1024) 0 ['conv4_block4_add[0][0]']
conv4_block5_1_conv (Conv2D) (None, 5, 5, 256) 262400 ['conv4_block4_out[0][0]']
conv4_block5_1_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block5_1_conv[0][0]']
ization)
conv4_block5_1_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block5_1_bn[0][0]']
n)
conv4_block5_2_conv (Conv2D) (None, 5, 5, 256) 590080 ['conv4_block5_1_relu[0][0]']
conv4_block5_2_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block5_2_conv[0][0]']
ization)
conv4_block5_2_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block5_2_bn[0][0]']
n)
conv4_block5_3_conv (Conv2D) (None, 5, 5, 1024) 263168 ['conv4_block5_2_relu[0][0]']
conv4_block5_3_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block5_3_conv[0][0]']
ization)
conv4_block5_add (Add) (None, 5, 5, 1024) 0 ['conv4_block4_out[0][0]',
'conv4_block5_3_bn[0][0]']
conv4_block5_out (Activation) (None, 5, 5, 1024) 0 ['conv4_block5_add[0][0]']
conv4_block6_1_conv (Conv2D) (None, 5, 5, 256) 262400 ['conv4_block5_out[0][0]']
conv4_block6_1_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block6_1_conv[0][0]']
ization)
conv4_block6_1_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block6_1_bn[0][0]']
n)
conv4_block6_2_conv (Conv2D) (None, 5, 5, 256) 590080 ['conv4_block6_1_relu[0][0]']
conv4_block6_2_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block6_2_conv[0][0]']
ization)
conv4_block6_2_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block6_2_bn[0][0]']
n)
conv4_block6_3_conv (Conv2D) (None, 5, 5, 1024) 263168 ['conv4_block6_2_relu[0][0]']
conv4_block6_3_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block6_3_conv[0][0]']
ization)
conv4_block6_add (Add) (None, 5, 5, 1024) 0 ['conv4_block5_out[0][0]',
'conv4_block6_3_bn[0][0]']
conv4_block6_out (Activation) (None, 5, 5, 1024) 0 ['conv4_block6_add[0][0]']
conv5_block1_1_conv (Conv2D) (None, 3, 3, 512) 524800 ['conv4_block6_out[0][0]']
conv5_block1_1_bn (BatchNormal (None, 3, 3, 512) 2048 ['conv5_block1_1_conv[0][0]']
ization)
conv5_block1_1_relu (Activatio (None, 3, 3, 512) 0 ['conv5_block1_1_bn[0][0]']
n)
conv5_block1_2_conv (Conv2D) (None, 3, 3, 512) 2359808 ['conv5_block1_1_relu[0][0]']
conv5_block1_2_bn (BatchNormal (None, 3, 3, 512) 2048 ['conv5_block1_2_conv[0][0]']
ization)
conv5_block1_2_relu (Activatio (None, 3, 3, 512) 0 ['conv5_block1_2_bn[0][0]']
n)
conv5_block1_0_conv (Conv2D) (None, 3, 3, 2048) 2099200 ['conv4_block6_out[0][0]']
conv5_block1_3_conv (Conv2D) (None, 3, 3, 2048) 1050624 ['conv5_block1_2_relu[0][0]']
conv5_block1_0_bn (BatchNormal (None, 3, 3, 2048) 8192 ['conv5_block1_0_conv[0][0]']
ization)
conv5_block1_3_bn (BatchNormal (None, 3, 3, 2048) 8192 ['conv5_block1_3_conv[0][0]']
ization)
conv5_block1_add (Add) (None, 3, 3, 2048) 0 ['conv5_block1_0_bn[0][0]',
'conv5_block1_3_bn[0][0]']
conv5_block1_out (Activation) (None, 3, 3, 2048) 0 ['conv5_block1_add[0][0]']
conv5_block2_1_conv (Conv2D) (None, 3, 3, 512) 1049088 ['conv5_block1_out[0][0]']
conv5_block2_1_bn (BatchNormal (None, 3, 3, 512) 2048 ['conv5_block2_1_conv[0][0]']
ization)
conv5_block2_1_relu (Activatio (None, 3, 3, 512) 0 ['conv5_block2_1_bn[0][0]']
n)
conv5_block2_2_conv (Conv2D) (None, 3, 3, 512) 2359808 ['conv5_block2_1_relu[0][0]']
conv5_block2_2_bn (BatchNormal (None, 3, 3, 512) 2048 ['conv5_block2_2_conv[0][0]']
ization)
conv5_block2_2_relu (Activatio (None, 3, 3, 512) 0 ['conv5_block2_2_bn[0][0]']
n)
conv5_block2_3_conv (Conv2D) (None, 3, 3, 2048) 1050624 ['conv5_block2_2_relu[0][0]']
conv5_block2_3_bn (BatchNormal (None, 3, 3, 2048) 8192 ['conv5_block2_3_conv[0][0]']
ization)
conv5_block2_add (Add) (None, 3, 3, 2048) 0 ['conv5_block1_out[0][0]',
'conv5_block2_3_bn[0][0]']
conv5_block2_out (Activation) (None, 3, 3, 2048) 0 ['conv5_block2_add[0][0]']
conv5_block3_1_conv (Conv2D) (None, 3, 3, 512) 1049088 ['conv5_block2_out[0][0]']
conv5_block3_1_bn (BatchNormal (None, 3, 3, 512) 2048 ['conv5_block3_1_conv[0][0]']
ization)
conv5_block3_1_relu (Activatio (None, 3, 3, 512) 0 ['conv5_block3_1_bn[0][0]']
n)
conv5_block3_2_conv (Conv2D) (None, 3, 3, 512) 2359808 ['conv5_block3_1_relu[0][0]']
conv5_block3_2_bn (BatchNormal (None, 3, 3, 512) 2048 ['conv5_block3_2_conv[0][0]']
ization)
conv5_block3_2_relu (Activatio (None, 3, 3, 512) 0 ['conv5_block3_2_bn[0][0]']
n)
conv5_block3_3_conv (Conv2D) (None, 3, 3, 2048) 1050624 ['conv5_block3_2_relu[0][0]']
conv5_block3_3_bn (BatchNormal (None, 3, 3, 2048) 8192 ['conv5_block3_3_conv[0][0]']
ization)
conv5_block3_add (Add) (None, 3, 3, 2048) 0 ['conv5_block2_out[0][0]',
'conv5_block3_3_bn[0][0]']
conv5_block3_out (Activation) (None, 3, 3, 2048) 0 ['conv5_block3_add[0][0]']
global_average_pooling2d_1 (Gl (None, 2048) 0 ['conv5_block3_out[0][0]']
obalAveragePooling2D)
1stFC (Dense) (None, 128) 262272 ['global_average_pooling2d_1[0][0
]']
2ndFC (Dense) (None, 64) 8256 ['1stFC[0][0]']
lastFC (Dense) (None, 10) 650 ['2ndFC[0][0]']
==================================================================================================
Total params: 23,858,890
Trainable params: 23,805,770
Non-trainable params: 53,120
__________________________________________________________________________________________________
None
Epoch 1/1000
313/313 [==============================] - ETA: 0s - loss: 0.7429 - accuracy: 0.7528
/usr/local/lib/python3.7/dist-packages/keras/engine/functional.py:1410: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument. layer_config = serialize_layer_fn(layer)
313/313 [==============================] - 168s 504ms/step - loss: 0.7429 - accuracy: 0.7528 - val_loss: 2.6878 - val_accuracy: 0.1000 Epoch 2/1000 313/313 [==============================] - 154s 491ms/step - loss: 0.4229 - accuracy: 0.8568 - val_loss: 2.6860 - val_accuracy: 0.1513 Epoch 3/1000 313/313 [==============================] - 153s 490ms/step - loss: 0.3091 - accuracy: 0.8949 - val_loss: 1.9735 - val_accuracy: 0.4169 Epoch 4/1000 313/313 [==============================] - 164s 523ms/step - loss: 0.2298 - accuracy: 0.9227 - val_loss: 0.8492 - val_accuracy: 0.7438 Epoch 5/1000 313/313 [==============================] - 153s 490ms/step - loss: 0.1755 - accuracy: 0.9413 - val_loss: 0.6516 - val_accuracy: 0.8151 Epoch 6/1000 313/313 [==============================] - 162s 518ms/step - loss: 0.1523 - accuracy: 0.9479 - val_loss: 1.2907 - val_accuracy: 0.6637 Epoch 7/1000 313/313 [==============================] - 152s 485ms/step - loss: 0.1184 - accuracy: 0.9597 - val_loss: 0.6903 - val_accuracy: 0.8183 Epoch 8/1000 313/313 [==============================] - 162s 517ms/step - loss: 0.1022 - accuracy: 0.9658 - val_loss: 0.9450 - val_accuracy: 0.7554 Epoch 9/1000 313/313 [==============================] - 152s 485ms/step - loss: 0.0915 - accuracy: 0.9697 - val_loss: 0.8400 - val_accuracy: 0.7825 Epoch 10/1000 313/313 [==============================] - 164s 523ms/step - loss: 0.0885 - accuracy: 0.9718 - val_loss: 0.6005 - val_accuracy: 0.8413 Epoch 11/1000 313/313 [==============================] - 152s 485ms/step - loss: 0.0744 - accuracy: 0.9749 - val_loss: 1.0328 - val_accuracy: 0.7759 Epoch 12/1000 313/313 [==============================] - 152s 486ms/step - loss: 0.0637 - accuracy: 0.9790 - val_loss: 0.8787 - val_accuracy: 0.7931 Epoch 13/1000 313/313 [==============================] - 152s 485ms/step - loss: 0.0622 - accuracy: 0.9790 - val_loss: 0.7085 - val_accuracy: 0.8217 Epoch 14/1000 313/313 [==============================] - 152s 486ms/step - loss: 0.0561 - accuracy: 0.9815 - val_loss: 0.7905 - val_accuracy: 0.8160 Epoch 15/1000 313/313 [==============================] - 152s 485ms/step - loss: 0.0514 - accuracy: 0.9826 - val_loss: 0.6714 - val_accuracy: 0.8373 313/313 [==============================] - 18s 55ms/step - loss: 0.6122 - accuracy: 0.8408
#import Library
from tensorflow import keras
import matplotlib.pyplot as plt # from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.resnet50 import ResNet50
#152 중 50 layer 쓴다(?) bring trained model
#pre trained model
#ImageNet competition으로부터 옴. 1.2Million, 1000 classes 중 구분
#CNN하기 위해 ResNet, Alexnet, googlenet, vgg 등 있다.
import tensorflow as tf
# keras.datasets. #하면 7가지 예제 있다. boston_housing 등
#(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()
#fashion = keras.datasets.fashion_mnist
#(train_images, train_labels), (test_images, test_labels) = fashion.load_data()
(train_images, train_labels), (test_images, test_labels) = keras.datasets.cifar10.load_data()
print(train_images.shape, train_labels.shape, test_images.shape, test_labels.shape)
#하면 (50000, 32, 32, 3) (50000, 1) (10000, 32, 32, 3) (10000, 1)로,
#3channel(RGB)임을 알 수 있다.
"""
plt.figure(figsize=(15,15))
plt.plot(5,5)
for i in range(25) :
plt.subplot(5,5,i+1)
plt.imshow(train_images[i])
plt.xticks([])
plt.yticks([])
plt.xlabel(f'This is {train_labels[i]}')
plt.ylabel('32x32 image')
plt.title('This is cifar10 image')
plt.show()
"""
#32 x 32 이미지라서 이미지 화질 구리다.
#이미지 크지 조정 : 정확도 올라감. 이미지 확대
#이렇게 tf 쓰는거 말고도 OpenCV 등의 방식 있다.
#32by32에서 점점 작아지는 거보다
#더 크게 해놓고 3by3 으로 줄이는 게 more parameter라서 정확도 좋다.
#train_images = tf.image.resize(images=train_images, size=(75, 75)) 이건 type이 numpy므로 tensor로 바꿔주자.
train_images = tf.image.resize(images=train_images, size=(75, 75)).numpy()
test_images = tf.image.resize(images=test_images, size=(75, 75)).numpy()
#Data Normalization : 0~255 scale to 0~1
train_images = train_images / 255.0
test_images = test_images / 255.0
train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.2, stratify = train_labels , random_state = 99 )
#stratify 는 나눠지는 (라벨 비율이 같게!) 거의 밸런스 맞춰지게 나눠짐.
print(train_images.shape)
print(train_images.shape[0])
#Modeling
base_model = ResNet50( include_top=False, input_shape=(75,75,3) )
#우리는 10개 output 필요한데, 1000개 제공하므로 FC(fully connected layer)(top) 필요 없다.
#weights = 'imagent'이란 파라미터는 그 대회에서 쓴 거가 default
input = base_model.input
x = base_model.output
#Flatten 대신 gap 쓰자
x = keras.layers.GlobalAveragePooling2D()(x)
#FC 추가해주자.
x = keras.layers.Dense(512, activation='relu', name='1stFC')(x)
x = keras.layers.Dense(128, activation='relu', name='2ndFC')(x)
output = keras.layers.Dense(10, activation='softmax', name='lastFC')(x)
"""
input = keras.layers.Input(shape=(32,32,3), name='Input_layer')
x = keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', name='1stCV')(input)
x1 = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='1stMP')(x)
x = keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu', name='2ndCV')(x1)
x = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='2ndMP')(x)
x = keras.layers.Flatten(name='Flatten')(x)
x = keras.layers.Dense(128, activation='relu', name='1stFC')(x)
x = keras.layers.Dense(64, activation='relu', name='2ndFC')(x)
output = keras.layers.Dense(10, activation='softmax', name='lastFC')(x)
"""
#24Million params 있다.....ㄷㄷㄷ
model1 = keras.models.Model(inputs=input, outputs=output)
print(model1.summary())
'''
first_cv = model1.get_layer('1stCV').output
second_cv = model1.get_layer('2ndCV').output
model2 = keras.models.Model(inputs = input, outputs = second_cv)
print(model2. summary())
model3 = keras.models.Model(inputs = input, outputs = [first_cv, second_cv] ) #일케하면 multi output (이경우 2output) 임.
print(model3. summary())
input_image = test_images[0]
input_image = input_image.reshape(1,28, 28,1)
conv_outputs = model2(input_image)
#4D로 만들어줘야 한다. batch 자리도 만들어줘야 함. (1, 28, 28, 1)의 사이즈로 만들어야 함.
conv_outputs = conv_outputs.numpy()
conv_outputs = conv_outputs.reshape(14,14,64)
plt.figure(figsize=(12,12))
for i in range(64) :
plt.subplot(8,8,i+1)
plt.imshow(conv_outputs[:,:,i], cmap='gray' )
plt.xticks([])
plt.yticks([])
plt.show()
'''
model1.compile(loss = 'SparseCategoricalCrossentropy', optimizer='adam', metrics=['accuracy'])
#'adam' or gradient descent method
path = 'best_model1.h5'
modelCheckpoint = keras.callbacks.ModelCheckpoint(filepath=path, monitor='val_loss', save_best_only = True)
earlyStopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
history = model1.fit(train_images, train_labels, batch_size = 128, validation_data=(val_images, val_labels), callbacks=[modelCheckpoint, earlyStopping], epochs=1000)
model1.load_weights('best_model1.h5')
loss, acc = model1.evaluate(test_images, test_labels)
plt.plot(history.history['loss'], label='Train loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss values')
plt.legend()
plt.show()
plt.plot(history.history['accuracy'], label='Train accuracy')
plt.plot(history.history['val_accuracy'], label='Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Loss values')
plt.legend()
plt.show()
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
170500096/170498071 [==============================] - 2s 0us/step
170508288/170498071 [==============================] - 2s 0us/step
(50000, 32, 32, 3) (50000, 1) (10000, 32, 32, 3) (10000, 1)
(40000, 75, 75, 3)
40000
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
94773248/94765736 [==============================] - 1s 0us/step
94781440/94765736 [==============================] - 1s 0us/step
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 75, 75, 3)] 0 []
conv1_pad (ZeroPadding2D) (None, 81, 81, 3) 0 ['input_1[0][0]']
conv1_conv (Conv2D) (None, 38, 38, 64) 9472 ['conv1_pad[0][0]']
conv1_bn (BatchNormalization) (None, 38, 38, 64) 256 ['conv1_conv[0][0]']
conv1_relu (Activation) (None, 38, 38, 64) 0 ['conv1_bn[0][0]']
pool1_pad (ZeroPadding2D) (None, 40, 40, 64) 0 ['conv1_relu[0][0]']
pool1_pool (MaxPooling2D) (None, 19, 19, 64) 0 ['pool1_pad[0][0]']
conv2_block1_1_conv (Conv2D) (None, 19, 19, 64) 4160 ['pool1_pool[0][0]']
conv2_block1_1_bn (BatchNormal (None, 19, 19, 64) 256 ['conv2_block1_1_conv[0][0]']
ization)
conv2_block1_1_relu (Activatio (None, 19, 19, 64) 0 ['conv2_block1_1_bn[0][0]']
n)
conv2_block1_2_conv (Conv2D) (None, 19, 19, 64) 36928 ['conv2_block1_1_relu[0][0]']
conv2_block1_2_bn (BatchNormal (None, 19, 19, 64) 256 ['conv2_block1_2_conv[0][0]']
ization)
conv2_block1_2_relu (Activatio (None, 19, 19, 64) 0 ['conv2_block1_2_bn[0][0]']
n)
conv2_block1_0_conv (Conv2D) (None, 19, 19, 256) 16640 ['pool1_pool[0][0]']
conv2_block1_3_conv (Conv2D) (None, 19, 19, 256) 16640 ['conv2_block1_2_relu[0][0]']
conv2_block1_0_bn (BatchNormal (None, 19, 19, 256) 1024 ['conv2_block1_0_conv[0][0]']
ization)
conv2_block1_3_bn (BatchNormal (None, 19, 19, 256) 1024 ['conv2_block1_3_conv[0][0]']
ization)
conv2_block1_add (Add) (None, 19, 19, 256) 0 ['conv2_block1_0_bn[0][0]',
'conv2_block1_3_bn[0][0]']
conv2_block1_out (Activation) (None, 19, 19, 256) 0 ['conv2_block1_add[0][0]']
conv2_block2_1_conv (Conv2D) (None, 19, 19, 64) 16448 ['conv2_block1_out[0][0]']
conv2_block2_1_bn (BatchNormal (None, 19, 19, 64) 256 ['conv2_block2_1_conv[0][0]']
ization)
conv2_block2_1_relu (Activatio (None, 19, 19, 64) 0 ['conv2_block2_1_bn[0][0]']
n)
conv2_block2_2_conv (Conv2D) (None, 19, 19, 64) 36928 ['conv2_block2_1_relu[0][0]']
conv2_block2_2_bn (BatchNormal (None, 19, 19, 64) 256 ['conv2_block2_2_conv[0][0]']
ization)
conv2_block2_2_relu (Activatio (None, 19, 19, 64) 0 ['conv2_block2_2_bn[0][0]']
n)
conv2_block2_3_conv (Conv2D) (None, 19, 19, 256) 16640 ['conv2_block2_2_relu[0][0]']
conv2_block2_3_bn (BatchNormal (None, 19, 19, 256) 1024 ['conv2_block2_3_conv[0][0]']
ization)
conv2_block2_add (Add) (None, 19, 19, 256) 0 ['conv2_block1_out[0][0]',
'conv2_block2_3_bn[0][0]']
conv2_block2_out (Activation) (None, 19, 19, 256) 0 ['conv2_block2_add[0][0]']
conv2_block3_1_conv (Conv2D) (None, 19, 19, 64) 16448 ['conv2_block2_out[0][0]']
conv2_block3_1_bn (BatchNormal (None, 19, 19, 64) 256 ['conv2_block3_1_conv[0][0]']
ization)
conv2_block3_1_relu (Activatio (None, 19, 19, 64) 0 ['conv2_block3_1_bn[0][0]']
n)
conv2_block3_2_conv (Conv2D) (None, 19, 19, 64) 36928 ['conv2_block3_1_relu[0][0]']
conv2_block3_2_bn (BatchNormal (None, 19, 19, 64) 256 ['conv2_block3_2_conv[0][0]']
ization)
conv2_block3_2_relu (Activatio (None, 19, 19, 64) 0 ['conv2_block3_2_bn[0][0]']
n)
conv2_block3_3_conv (Conv2D) (None, 19, 19, 256) 16640 ['conv2_block3_2_relu[0][0]']
conv2_block3_3_bn (BatchNormal (None, 19, 19, 256) 1024 ['conv2_block3_3_conv[0][0]']
ization)
conv2_block3_add (Add) (None, 19, 19, 256) 0 ['conv2_block2_out[0][0]',
'conv2_block3_3_bn[0][0]']
conv2_block3_out (Activation) (None, 19, 19, 256) 0 ['conv2_block3_add[0][0]']
conv3_block1_1_conv (Conv2D) (None, 10, 10, 128) 32896 ['conv2_block3_out[0][0]']
conv3_block1_1_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block1_1_conv[0][0]']
ization)
conv3_block1_1_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block1_1_bn[0][0]']
n)
conv3_block1_2_conv (Conv2D) (None, 10, 10, 128) 147584 ['conv3_block1_1_relu[0][0]']
conv3_block1_2_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block1_2_conv[0][0]']
ization)
conv3_block1_2_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block1_2_bn[0][0]']
n)
conv3_block1_0_conv (Conv2D) (None, 10, 10, 512) 131584 ['conv2_block3_out[0][0]']
conv3_block1_3_conv (Conv2D) (None, 10, 10, 512) 66048 ['conv3_block1_2_relu[0][0]']
conv3_block1_0_bn (BatchNormal (None, 10, 10, 512) 2048 ['conv3_block1_0_conv[0][0]']
ization)
conv3_block1_3_bn (BatchNormal (None, 10, 10, 512) 2048 ['conv3_block1_3_conv[0][0]']
ization)
conv3_block1_add (Add) (None, 10, 10, 512) 0 ['conv3_block1_0_bn[0][0]',
'conv3_block1_3_bn[0][0]']
conv3_block1_out (Activation) (None, 10, 10, 512) 0 ['conv3_block1_add[0][0]']
conv3_block2_1_conv (Conv2D) (None, 10, 10, 128) 65664 ['conv3_block1_out[0][0]']
conv3_block2_1_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block2_1_conv[0][0]']
ization)
conv3_block2_1_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block2_1_bn[0][0]']
n)
conv3_block2_2_conv (Conv2D) (None, 10, 10, 128) 147584 ['conv3_block2_1_relu[0][0]']
conv3_block2_2_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block2_2_conv[0][0]']
ization)
conv3_block2_2_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block2_2_bn[0][0]']
n)
conv3_block2_3_conv (Conv2D) (None, 10, 10, 512) 66048 ['conv3_block2_2_relu[0][0]']
conv3_block2_3_bn (BatchNormal (None, 10, 10, 512) 2048 ['conv3_block2_3_conv[0][0]']
ization)
conv3_block2_add (Add) (None, 10, 10, 512) 0 ['conv3_block1_out[0][0]',
'conv3_block2_3_bn[0][0]']
conv3_block2_out (Activation) (None, 10, 10, 512) 0 ['conv3_block2_add[0][0]']
conv3_block3_1_conv (Conv2D) (None, 10, 10, 128) 65664 ['conv3_block2_out[0][0]']
conv3_block3_1_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block3_1_conv[0][0]']
ization)
conv3_block3_1_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block3_1_bn[0][0]']
n)
conv3_block3_2_conv (Conv2D) (None, 10, 10, 128) 147584 ['conv3_block3_1_relu[0][0]']
conv3_block3_2_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block3_2_conv[0][0]']
ization)
conv3_block3_2_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block3_2_bn[0][0]']
n)
conv3_block3_3_conv (Conv2D) (None, 10, 10, 512) 66048 ['conv3_block3_2_relu[0][0]']
conv3_block3_3_bn (BatchNormal (None, 10, 10, 512) 2048 ['conv3_block3_3_conv[0][0]']
ization)
conv3_block3_add (Add) (None, 10, 10, 512) 0 ['conv3_block2_out[0][0]',
'conv3_block3_3_bn[0][0]']
conv3_block3_out (Activation) (None, 10, 10, 512) 0 ['conv3_block3_add[0][0]']
conv3_block4_1_conv (Conv2D) (None, 10, 10, 128) 65664 ['conv3_block3_out[0][0]']
conv3_block4_1_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block4_1_conv[0][0]']
ization)
conv3_block4_1_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block4_1_bn[0][0]']
n)
conv3_block4_2_conv (Conv2D) (None, 10, 10, 128) 147584 ['conv3_block4_1_relu[0][0]']
conv3_block4_2_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block4_2_conv[0][0]']
ization)
conv3_block4_2_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block4_2_bn[0][0]']
n)
conv3_block4_3_conv (Conv2D) (None, 10, 10, 512) 66048 ['conv3_block4_2_relu[0][0]']
conv3_block4_3_bn (BatchNormal (None, 10, 10, 512) 2048 ['conv3_block4_3_conv[0][0]']
ization)
conv3_block4_add (Add) (None, 10, 10, 512) 0 ['conv3_block3_out[0][0]',
'conv3_block4_3_bn[0][0]']
conv3_block4_out (Activation) (None, 10, 10, 512) 0 ['conv3_block4_add[0][0]']
conv4_block1_1_conv (Conv2D) (None, 5, 5, 256) 131328 ['conv3_block4_out[0][0]']
conv4_block1_1_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block1_1_conv[0][0]']
ization)
conv4_block1_1_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block1_1_bn[0][0]']
n)
conv4_block1_2_conv (Conv2D) (None, 5, 5, 256) 590080 ['conv4_block1_1_relu[0][0]']
conv4_block1_2_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block1_2_conv[0][0]']
ization)
conv4_block1_2_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block1_2_bn[0][0]']
n)
conv4_block1_0_conv (Conv2D) (None, 5, 5, 1024) 525312 ['conv3_block4_out[0][0]']
conv4_block1_3_conv (Conv2D) (None, 5, 5, 1024) 263168 ['conv4_block1_2_relu[0][0]']
conv4_block1_0_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block1_0_conv[0][0]']
ization)
conv4_block1_3_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block1_3_conv[0][0]']
ization)
conv4_block1_add (Add) (None, 5, 5, 1024) 0 ['conv4_block1_0_bn[0][0]',
'conv4_block1_3_bn[0][0]']
conv4_block1_out (Activation) (None, 5, 5, 1024) 0 ['conv4_block1_add[0][0]']
conv4_block2_1_conv (Conv2D) (None, 5, 5, 256) 262400 ['conv4_block1_out[0][0]']
conv4_block2_1_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block2_1_conv[0][0]']
ization)
conv4_block2_1_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block2_1_bn[0][0]']
n)
conv4_block2_2_conv (Conv2D) (None, 5, 5, 256) 590080 ['conv4_block2_1_relu[0][0]']
conv4_block2_2_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block2_2_conv[0][0]']
ization)
conv4_block2_2_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block2_2_bn[0][0]']
n)
conv4_block2_3_conv (Conv2D) (None, 5, 5, 1024) 263168 ['conv4_block2_2_relu[0][0]']
conv4_block2_3_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block2_3_conv[0][0]']
ization)
conv4_block2_add (Add) (None, 5, 5, 1024) 0 ['conv4_block1_out[0][0]',
'conv4_block2_3_bn[0][0]']
conv4_block2_out (Activation) (None, 5, 5, 1024) 0 ['conv4_block2_add[0][0]']
conv4_block3_1_conv (Conv2D) (None, 5, 5, 256) 262400 ['conv4_block2_out[0][0]']
conv4_block3_1_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block3_1_conv[0][0]']
ization)
conv4_block3_1_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block3_1_bn[0][0]']
n)
conv4_block3_2_conv (Conv2D) (None, 5, 5, 256) 590080 ['conv4_block3_1_relu[0][0]']
conv4_block3_2_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block3_2_conv[0][0]']
ization)
conv4_block3_2_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block3_2_bn[0][0]']
n)
conv4_block3_3_conv (Conv2D) (None, 5, 5, 1024) 263168 ['conv4_block3_2_relu[0][0]']
conv4_block3_3_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block3_3_conv[0][0]']
ization)
conv4_block3_add (Add) (None, 5, 5, 1024) 0 ['conv4_block2_out[0][0]',
'conv4_block3_3_bn[0][0]']
conv4_block3_out (Activation) (None, 5, 5, 1024) 0 ['conv4_block3_add[0][0]']
conv4_block4_1_conv (Conv2D) (None, 5, 5, 256) 262400 ['conv4_block3_out[0][0]']
conv4_block4_1_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block4_1_conv[0][0]']
ization)
conv4_block4_1_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block4_1_bn[0][0]']
n)
conv4_block4_2_conv (Conv2D) (None, 5, 5, 256) 590080 ['conv4_block4_1_relu[0][0]']
conv4_block4_2_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block4_2_conv[0][0]']
ization)
conv4_block4_2_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block4_2_bn[0][0]']
n)
conv4_block4_3_conv (Conv2D) (None, 5, 5, 1024) 263168 ['conv4_block4_2_relu[0][0]']
conv4_block4_3_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block4_3_conv[0][0]']
ization)
conv4_block4_add (Add) (None, 5, 5, 1024) 0 ['conv4_block3_out[0][0]',
'conv4_block4_3_bn[0][0]']
conv4_block4_out (Activation) (None, 5, 5, 1024) 0 ['conv4_block4_add[0][0]']
conv4_block5_1_conv (Conv2D) (None, 5, 5, 256) 262400 ['conv4_block4_out[0][0]']
conv4_block5_1_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block5_1_conv[0][0]']
ization)
conv4_block5_1_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block5_1_bn[0][0]']
n)
conv4_block5_2_conv (Conv2D) (None, 5, 5, 256) 590080 ['conv4_block5_1_relu[0][0]']
conv4_block5_2_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block5_2_conv[0][0]']
ization)
conv4_block5_2_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block5_2_bn[0][0]']
n)
conv4_block5_3_conv (Conv2D) (None, 5, 5, 1024) 263168 ['conv4_block5_2_relu[0][0]']
conv4_block5_3_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block5_3_conv[0][0]']
ization)
conv4_block5_add (Add) (None, 5, 5, 1024) 0 ['conv4_block4_out[0][0]',
'conv4_block5_3_bn[0][0]']
conv4_block5_out (Activation) (None, 5, 5, 1024) 0 ['conv4_block5_add[0][0]']
conv4_block6_1_conv (Conv2D) (None, 5, 5, 256) 262400 ['conv4_block5_out[0][0]']
conv4_block6_1_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block6_1_conv[0][0]']
ization)
conv4_block6_1_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block6_1_bn[0][0]']
n)
conv4_block6_2_conv (Conv2D) (None, 5, 5, 256) 590080 ['conv4_block6_1_relu[0][0]']
conv4_block6_2_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block6_2_conv[0][0]']
ization)
conv4_block6_2_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block6_2_bn[0][0]']
n)
conv4_block6_3_conv (Conv2D) (None, 5, 5, 1024) 263168 ['conv4_block6_2_relu[0][0]']
conv4_block6_3_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block6_3_conv[0][0]']
ization)
conv4_block6_add (Add) (None, 5, 5, 1024) 0 ['conv4_block5_out[0][0]',
'conv4_block6_3_bn[0][0]']
conv4_block6_out (Activation) (None, 5, 5, 1024) 0 ['conv4_block6_add[0][0]']
conv5_block1_1_conv (Conv2D) (None, 3, 3, 512) 524800 ['conv4_block6_out[0][0]']
conv5_block1_1_bn (BatchNormal (None, 3, 3, 512) 2048 ['conv5_block1_1_conv[0][0]']
ization)
conv5_block1_1_relu (Activatio (None, 3, 3, 512) 0 ['conv5_block1_1_bn[0][0]']
n)
conv5_block1_2_conv (Conv2D) (None, 3, 3, 512) 2359808 ['conv5_block1_1_relu[0][0]']
conv5_block1_2_bn (BatchNormal (None, 3, 3, 512) 2048 ['conv5_block1_2_conv[0][0]']
ization)
conv5_block1_2_relu (Activatio (None, 3, 3, 512) 0 ['conv5_block1_2_bn[0][0]']
n)
conv5_block1_0_conv (Conv2D) (None, 3, 3, 2048) 2099200 ['conv4_block6_out[0][0]']
conv5_block1_3_conv (Conv2D) (None, 3, 3, 2048) 1050624 ['conv5_block1_2_relu[0][0]']
conv5_block1_0_bn (BatchNormal (None, 3, 3, 2048) 8192 ['conv5_block1_0_conv[0][0]']
ization)
conv5_block1_3_bn (BatchNormal (None, 3, 3, 2048) 8192 ['conv5_block1_3_conv[0][0]']
ization)
conv5_block1_add (Add) (None, 3, 3, 2048) 0 ['conv5_block1_0_bn[0][0]',
'conv5_block1_3_bn[0][0]']
conv5_block1_out (Activation) (None, 3, 3, 2048) 0 ['conv5_block1_add[0][0]']
conv5_block2_1_conv (Conv2D) (None, 3, 3, 512) 1049088 ['conv5_block1_out[0][0]']
conv5_block2_1_bn (BatchNormal (None, 3, 3, 512) 2048 ['conv5_block2_1_conv[0][0]']
ization)
conv5_block2_1_relu (Activatio (None, 3, 3, 512) 0 ['conv5_block2_1_bn[0][0]']
n)
conv5_block2_2_conv (Conv2D) (None, 3, 3, 512) 2359808 ['conv5_block2_1_relu[0][0]']
conv5_block2_2_bn (BatchNormal (None, 3, 3, 512) 2048 ['conv5_block2_2_conv[0][0]']
ization)
conv5_block2_2_relu (Activatio (None, 3, 3, 512) 0 ['conv5_block2_2_bn[0][0]']
n)
conv5_block2_3_conv (Conv2D) (None, 3, 3, 2048) 1050624 ['conv5_block2_2_relu[0][0]']
conv5_block2_3_bn (BatchNormal (None, 3, 3, 2048) 8192 ['conv5_block2_3_conv[0][0]']
ization)
conv5_block2_add (Add) (None, 3, 3, 2048) 0 ['conv5_block1_out[0][0]',
'conv5_block2_3_bn[0][0]']
conv5_block2_out (Activation) (None, 3, 3, 2048) 0 ['conv5_block2_add[0][0]']
conv5_block3_1_conv (Conv2D) (None, 3, 3, 512) 1049088 ['conv5_block2_out[0][0]']
conv5_block3_1_bn (BatchNormal (None, 3, 3, 512) 2048 ['conv5_block3_1_conv[0][0]']
ization)
conv5_block3_1_relu (Activatio (None, 3, 3, 512) 0 ['conv5_block3_1_bn[0][0]']
n)
conv5_block3_2_conv (Conv2D) (None, 3, 3, 512) 2359808 ['conv5_block3_1_relu[0][0]']
conv5_block3_2_bn (BatchNormal (None, 3, 3, 512) 2048 ['conv5_block3_2_conv[0][0]']
ization)
conv5_block3_2_relu (Activatio (None, 3, 3, 512) 0 ['conv5_block3_2_bn[0][0]']
n)
conv5_block3_3_conv (Conv2D) (None, 3, 3, 2048) 1050624 ['conv5_block3_2_relu[0][0]']
conv5_block3_3_bn (BatchNormal (None, 3, 3, 2048) 8192 ['conv5_block3_3_conv[0][0]']
ization)
conv5_block3_add (Add) (None, 3, 3, 2048) 0 ['conv5_block2_out[0][0]',
'conv5_block3_3_bn[0][0]']
conv5_block3_out (Activation) (None, 3, 3, 2048) 0 ['conv5_block3_add[0][0]']
global_average_pooling2d (Glob (None, 2048) 0 ['conv5_block3_out[0][0]']
alAveragePooling2D)
1stFC (Dense) (None, 512) 1049088 ['global_average_pooling2d[0][0]'
]
2ndFC (Dense) (None, 128) 65664 ['1stFC[0][0]']
lastFC (Dense) (None, 10) 1290 ['2ndFC[0][0]']
==================================================================================================
Total params: 24,703,754
Trainable params: 24,650,634
Non-trainable params: 53,120
__________________________________________________________________________________________________
None
Epoch 1/1000
313/313 [==============================] - ETA: 0s - loss: 0.7371 - accuracy: 0.7548
/usr/local/lib/python3.7/dist-packages/keras/engine/functional.py:1410: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument. layer_config = serialize_layer_fn(layer)
313/313 [==============================] - 220s 643ms/step - loss: 0.7371 - accuracy: 0.7548 - val_loss: 3.0292 - val_accuracy: 0.1000 Epoch 2/1000 313/313 [==============================] - 196s 627ms/step - loss: 0.4155 - accuracy: 0.8586 - val_loss: 2.7108 - val_accuracy: 0.1547 Epoch 3/1000 313/313 [==============================] - 196s 626ms/step - loss: 0.3075 - accuracy: 0.8965 - val_loss: 1.4528 - val_accuracy: 0.5558 Epoch 4/1000 313/313 [==============================] - 203s 649ms/step - loss: 0.2359 - accuracy: 0.9204 - val_loss: 0.7114 - val_accuracy: 0.7829 Epoch 5/1000 313/313 [==============================] - 201s 644ms/step - loss: 0.1791 - accuracy: 0.9383 - val_loss: 0.9262 - val_accuracy: 0.7605 Epoch 6/1000 313/313 [==============================] - 194s 621ms/step - loss: 0.1464 - accuracy: 0.9502 - val_loss: 0.9207 - val_accuracy: 0.7603 Epoch 7/1000 313/313 [==============================] - 196s 626ms/step - loss: 0.1156 - accuracy: 0.9610 - val_loss: 0.6410 - val_accuracy: 0.8341 Epoch 8/1000 313/313 [==============================] - 194s 620ms/step - loss: 0.1017 - accuracy: 0.9656 - val_loss: 1.3821 - val_accuracy: 0.6515 Epoch 9/1000 313/313 [==============================] - 201s 644ms/step - loss: 0.0897 - accuracy: 0.9701 - val_loss: 0.6439 - val_accuracy: 0.8322 Epoch 10/1000 313/313 [==============================] - 194s 621ms/step - loss: 0.0845 - accuracy: 0.9719 - val_loss: 1.6082 - val_accuracy: 0.5876 Epoch 11/1000 313/313 [==============================] - 194s 620ms/step - loss: 0.0738 - accuracy: 0.9756 - val_loss: 1.1481 - val_accuracy: 0.7441 Epoch 12/1000 313/313 [==============================] - 194s 620ms/step - loss: 0.0680 - accuracy: 0.9777 - val_loss: 1.0796 - val_accuracy: 0.7592 313/313 [==============================] - 22s 70ms/step - loss: 0.6659 - accuracy: 0.8285
이런 걸 transfer learning이라고 한다.
1.2Million Image form imageNet,
그리고 이게 classify 1000 classes.
50000 images from CIFAR10 ->classify 10개의 target(classes)으로.
Approaches
1) train all parameters
(우리가 한 방법)
2) partially train, 즉 pretrained는 frozen하고, train only FC
근데 가끔은 빨간색 할 필요 없고 New FC꺼만 train할 때도 있음.
즉, 175개의 layer 있다.
true니까 update weight and bias 할 수 있다.
이렇게 바꾸면 0번 layer의 weight, bias를 바꿀 수 없게 한다.
모든 parameter를 못바꾸게 하면,...
trainable params는 can be updated : FC이다.
non-trainable params :는 can not be updated : res50꺼이다.
3) pretrained은 frozen하고, for only 1 or 2 epochs는 train all
완전 똑같은 이미진데, 이렇게 조금씩 변형 일어나면 다른 이미지로 인식함.
그래서 데이터 부족할 때, 이렇게 변형해가지구 트레인 하면 됨.
model을 generalize하기 위해, increase the number of image, by augmentation.
#import Library
from tensorflow import keras
import matplotlib.pyplot as plt # from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.resnet50 import ResNet50
#152 중 50 layer 쓴다(?) bring trained model
#pre trained model
#ImageNet competition으로부터 옴. 1.2Million, 1000 classes 중 구분
#CNN하기 위해 ResNet, Alexnet, googlenet, vgg 등 있다.
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator #Augmentation
# keras.datasets. #하면 7가지 예제 있다. boston_housing 등
#(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()
#fashion = keras.datasets.fashion_mnist
#(train_images, train_labels), (test_images, test_labels) = fashion.load_data()
(train_images, train_labels), (test_images, test_labels) = keras.datasets.cifar10.load_data()
print(train_images.shape, train_labels.shape, test_images.shape, test_labels.shape)
#하면 (50000, 32, 32, 3) (50000, 1) (10000, 32, 32, 3) (10000, 1)로,
#3channel(RGB)임을 알 수 있다.
"""
plt.figure(figsize=(15,15))
plt.plot(5,5)
for i in range(25) :
plt.subplot(5,5,i+1)
plt.imshow(train_images[i])
plt.xticks([])
plt.yticks([])
plt.xlabel(f'This is {train_labels[i]}')
plt.ylabel('32x32 image')
plt.title('This is cifar10 image')
plt.show()
"""
#32 x 32 이미지라서 이미지 화질 구리다.
#이미지 크지 조정 : 정확도 올라감. 이미지 확대
#이렇게 tf 쓰는거 말고도 OpenCV 등의 방식 있다.
#32by32에서 점점 작아지는 거보다
#더 크게 해놓고 3by3 으로 줄이는 게 more parameter라서 정확도 좋다.
#train_images = tf.image.resize(images=train_images, size=(75, 75)) 이건 type이 numpy므로 tensor로 바꿔주자.
train_images = tf.image.resize(images=train_images, size=(75, 75)).numpy()
test_images = tf.image.resize(images=test_images, size=(75, 75)).numpy()
#Data Normalization : 0~255 scale to 0~1
train_images = train_images / 255.0
test_images = test_images / 255.0
train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.2, stratify = train_labels , random_state = 99 )
#stratify 는 나눠지는 (라벨 비율이 같게!) 거의 밸런스 맞춰지게 나눠짐.
print(train_images.shape)
print(train_images.shape[0])
#Modeling
base_model = ResNet50( include_top=False, input_shape=(75,75,3) )
#우리는 10개 output 필요한데, 1000개 제공하므로 FC(fully connected layer)(top) 필요 없다.
#weights = 'imagent'이란 파라미터는 그 대회에서 쓴 거가 default
input = base_model.input
x = base_model.output
#Flatten 대신 gap 쓰자
x = keras.layers.GlobalAveragePooling2D()(x)
#FC 추가해주자.
x = keras.layers.Dense(512, activation='relu', name='1stFC')(x)
x = keras.layers.Dense(128, activation='relu', name='2ndFC')(x)
output = keras.layers.Dense(10, activation='softmax', name='lastFC')(x)
"""
input = keras.layers.Input(shape=(32,32,3), name='Input_layer')
x = keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', name='1stCV')(input)
x1 = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='1stMP')(x)
x = keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu', name='2ndCV')(x1)
x = keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2), name='2ndMP')(x)
x = keras.layers.Flatten(name='Flatten')(x)
x = keras.layers.Dense(128, activation='relu', name='1stFC')(x)
x = keras.layers.Dense(64, activation='relu', name='2ndFC')(x)
output = keras.layers.Dense(10, activation='softmax', name='lastFC')(x)
"""
#24Million params 있다.....ㄷㄷㄷ
model1 = keras.models.Model(inputs=input, outputs=output)
print(model1.summary())
'''
first_cv = model1.get_layer('1stCV').output
second_cv = model1.get_layer('2ndCV').output
model2 = keras.models.Model(inputs = input, outputs = second_cv)
print(model2. summary())
model3 = keras.models.Model(inputs = input, outputs = [first_cv, second_cv] ) #일케하면 multi output (이경우 2output) 임.
print(model3. summary())
input_image = test_images[0]
input_image = input_image.reshape(1,28, 28,1)
conv_outputs = model2(input_image)
#4D로 만들어줘야 한다. batch 자리도 만들어줘야 함. (1, 28, 28, 1)의 사이즈로 만들어야 함.
conv_outputs = conv_outputs.numpy()
conv_outputs = conv_outputs.reshape(14,14,64)
plt.figure(figsize=(12,12))
for i in range(64) :
plt.subplot(8,8,i+1)
plt.imshow(conv_outputs[:,:,i], cmap='gray' )
plt.xticks([])
plt.yticks([])
plt.show()
'''
model1.compile(loss = 'SparseCategoricalCrossentropy', optimizer='adam', metrics=['accuracy'])
#'adam' or gradient descent method
path = 'best_model1.h5'
modelCheckpoint = keras.callbacks.ModelCheckpoint(filepath=path, monitor='val_loss', save_best_only = True)
earlyStopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
#image augmentation
image_generator = ImageDataGenerator(rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.05, horizontal_flip=True)
#-10degrees ~ 10degrees로 랜덤 넘버 selected된다.
#뒤에꺼 단위 퍼센트임.
#default는 다 False임.
history = model1.fit_generator(generator = image_generator.flow(train_images, train_labels, batch_size=128), steps_per_epoch=train_images.shape[0]//128, validation_data=(val_images, val_labels), epochs=1000, callbacks=[modelCheckpoint, earlyStopping] )
'''
model.fit(generator = image_generator.flow(train_images, train_lables, batch_size=128), steps_per_epoch=train_images.shape[0]//128, validation_data=(val_images, val_labels], epochs=1000, callbacks=[modelCheckpoint, earlyStopping] )
'''
"""
history = model1.fit(train_images, train_labels, batch_size = 128, validation_data=(val_images, val_labels), callbacks=[modelCheckpoint, earlyStopping], epochs=1000)
"""
model1.load_weights('best_model1.h5')
loss, acc = model1.evaluate(test_images, test_labels)
plt.plot(history.history['loss'], label='Train loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss values')
plt.legend()
plt.show()
plt.plot(history.history['accuracy'], label='Train accuracy')
plt.plot(history.history['val_accuracy'], label='Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Loss values')
plt.legend()
plt.show()
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
170500096/170498071 [==============================] - 6s 0us/step
170508288/170498071 [==============================] - 6s 0us/step
(50000, 32, 32, 3) (50000, 1) (10000, 32, 32, 3) (10000, 1)
(40000, 75, 75, 3)
40000
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
94773248/94765736 [==============================] - 2s 0us/step
94781440/94765736 [==============================] - 2s 0us/step
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 75, 75, 3)] 0 []
conv1_pad (ZeroPadding2D) (None, 81, 81, 3) 0 ['input_1[0][0]']
conv1_conv (Conv2D) (None, 38, 38, 64) 9472 ['conv1_pad[0][0]']
conv1_bn (BatchNormalization) (None, 38, 38, 64) 256 ['conv1_conv[0][0]']
conv1_relu (Activation) (None, 38, 38, 64) 0 ['conv1_bn[0][0]']
pool1_pad (ZeroPadding2D) (None, 40, 40, 64) 0 ['conv1_relu[0][0]']
pool1_pool (MaxPooling2D) (None, 19, 19, 64) 0 ['pool1_pad[0][0]']
conv2_block1_1_conv (Conv2D) (None, 19, 19, 64) 4160 ['pool1_pool[0][0]']
conv2_block1_1_bn (BatchNormal (None, 19, 19, 64) 256 ['conv2_block1_1_conv[0][0]']
ization)
conv2_block1_1_relu (Activatio (None, 19, 19, 64) 0 ['conv2_block1_1_bn[0][0]']
n)
conv2_block1_2_conv (Conv2D) (None, 19, 19, 64) 36928 ['conv2_block1_1_relu[0][0]']
conv2_block1_2_bn (BatchNormal (None, 19, 19, 64) 256 ['conv2_block1_2_conv[0][0]']
ization)
conv2_block1_2_relu (Activatio (None, 19, 19, 64) 0 ['conv2_block1_2_bn[0][0]']
n)
conv2_block1_0_conv (Conv2D) (None, 19, 19, 256) 16640 ['pool1_pool[0][0]']
conv2_block1_3_conv (Conv2D) (None, 19, 19, 256) 16640 ['conv2_block1_2_relu[0][0]']
conv2_block1_0_bn (BatchNormal (None, 19, 19, 256) 1024 ['conv2_block1_0_conv[0][0]']
ization)
conv2_block1_3_bn (BatchNormal (None, 19, 19, 256) 1024 ['conv2_block1_3_conv[0][0]']
ization)
conv2_block1_add (Add) (None, 19, 19, 256) 0 ['conv2_block1_0_bn[0][0]',
'conv2_block1_3_bn[0][0]']
conv2_block1_out (Activation) (None, 19, 19, 256) 0 ['conv2_block1_add[0][0]']
conv2_block2_1_conv (Conv2D) (None, 19, 19, 64) 16448 ['conv2_block1_out[0][0]']
conv2_block2_1_bn (BatchNormal (None, 19, 19, 64) 256 ['conv2_block2_1_conv[0][0]']
ization)
conv2_block2_1_relu (Activatio (None, 19, 19, 64) 0 ['conv2_block2_1_bn[0][0]']
n)
conv2_block2_2_conv (Conv2D) (None, 19, 19, 64) 36928 ['conv2_block2_1_relu[0][0]']
conv2_block2_2_bn (BatchNormal (None, 19, 19, 64) 256 ['conv2_block2_2_conv[0][0]']
ization)
conv2_block2_2_relu (Activatio (None, 19, 19, 64) 0 ['conv2_block2_2_bn[0][0]']
n)
conv2_block2_3_conv (Conv2D) (None, 19, 19, 256) 16640 ['conv2_block2_2_relu[0][0]']
conv2_block2_3_bn (BatchNormal (None, 19, 19, 256) 1024 ['conv2_block2_3_conv[0][0]']
ization)
conv2_block2_add (Add) (None, 19, 19, 256) 0 ['conv2_block1_out[0][0]',
'conv2_block2_3_bn[0][0]']
conv2_block2_out (Activation) (None, 19, 19, 256) 0 ['conv2_block2_add[0][0]']
conv2_block3_1_conv (Conv2D) (None, 19, 19, 64) 16448 ['conv2_block2_out[0][0]']
conv2_block3_1_bn (BatchNormal (None, 19, 19, 64) 256 ['conv2_block3_1_conv[0][0]']
ization)
conv2_block3_1_relu (Activatio (None, 19, 19, 64) 0 ['conv2_block3_1_bn[0][0]']
n)
conv2_block3_2_conv (Conv2D) (None, 19, 19, 64) 36928 ['conv2_block3_1_relu[0][0]']
conv2_block3_2_bn (BatchNormal (None, 19, 19, 64) 256 ['conv2_block3_2_conv[0][0]']
ization)
conv2_block3_2_relu (Activatio (None, 19, 19, 64) 0 ['conv2_block3_2_bn[0][0]']
n)
conv2_block3_3_conv (Conv2D) (None, 19, 19, 256) 16640 ['conv2_block3_2_relu[0][0]']
conv2_block3_3_bn (BatchNormal (None, 19, 19, 256) 1024 ['conv2_block3_3_conv[0][0]']
ization)
conv2_block3_add (Add) (None, 19, 19, 256) 0 ['conv2_block2_out[0][0]',
'conv2_block3_3_bn[0][0]']
conv2_block3_out (Activation) (None, 19, 19, 256) 0 ['conv2_block3_add[0][0]']
conv3_block1_1_conv (Conv2D) (None, 10, 10, 128) 32896 ['conv2_block3_out[0][0]']
conv3_block1_1_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block1_1_conv[0][0]']
ization)
conv3_block1_1_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block1_1_bn[0][0]']
n)
conv3_block1_2_conv (Conv2D) (None, 10, 10, 128) 147584 ['conv3_block1_1_relu[0][0]']
conv3_block1_2_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block1_2_conv[0][0]']
ization)
conv3_block1_2_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block1_2_bn[0][0]']
n)
conv3_block1_0_conv (Conv2D) (None, 10, 10, 512) 131584 ['conv2_block3_out[0][0]']
conv3_block1_3_conv (Conv2D) (None, 10, 10, 512) 66048 ['conv3_block1_2_relu[0][0]']
conv3_block1_0_bn (BatchNormal (None, 10, 10, 512) 2048 ['conv3_block1_0_conv[0][0]']
ization)
conv3_block1_3_bn (BatchNormal (None, 10, 10, 512) 2048 ['conv3_block1_3_conv[0][0]']
ization)
conv3_block1_add (Add) (None, 10, 10, 512) 0 ['conv3_block1_0_bn[0][0]',
'conv3_block1_3_bn[0][0]']
conv3_block1_out (Activation) (None, 10, 10, 512) 0 ['conv3_block1_add[0][0]']
conv3_block2_1_conv (Conv2D) (None, 10, 10, 128) 65664 ['conv3_block1_out[0][0]']
conv3_block2_1_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block2_1_conv[0][0]']
ization)
conv3_block2_1_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block2_1_bn[0][0]']
n)
conv3_block2_2_conv (Conv2D) (None, 10, 10, 128) 147584 ['conv3_block2_1_relu[0][0]']
conv3_block2_2_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block2_2_conv[0][0]']
ization)
conv3_block2_2_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block2_2_bn[0][0]']
n)
conv3_block2_3_conv (Conv2D) (None, 10, 10, 512) 66048 ['conv3_block2_2_relu[0][0]']
conv3_block2_3_bn (BatchNormal (None, 10, 10, 512) 2048 ['conv3_block2_3_conv[0][0]']
ization)
conv3_block2_add (Add) (None, 10, 10, 512) 0 ['conv3_block1_out[0][0]',
'conv3_block2_3_bn[0][0]']
conv3_block2_out (Activation) (None, 10, 10, 512) 0 ['conv3_block2_add[0][0]']
conv3_block3_1_conv (Conv2D) (None, 10, 10, 128) 65664 ['conv3_block2_out[0][0]']
conv3_block3_1_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block3_1_conv[0][0]']
ization)
conv3_block3_1_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block3_1_bn[0][0]']
n)
conv3_block3_2_conv (Conv2D) (None, 10, 10, 128) 147584 ['conv3_block3_1_relu[0][0]']
conv3_block3_2_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block3_2_conv[0][0]']
ization)
conv3_block3_2_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block3_2_bn[0][0]']
n)
conv3_block3_3_conv (Conv2D) (None, 10, 10, 512) 66048 ['conv3_block3_2_relu[0][0]']
conv3_block3_3_bn (BatchNormal (None, 10, 10, 512) 2048 ['conv3_block3_3_conv[0][0]']
ization)
conv3_block3_add (Add) (None, 10, 10, 512) 0 ['conv3_block2_out[0][0]',
'conv3_block3_3_bn[0][0]']
conv3_block3_out (Activation) (None, 10, 10, 512) 0 ['conv3_block3_add[0][0]']
conv3_block4_1_conv (Conv2D) (None, 10, 10, 128) 65664 ['conv3_block3_out[0][0]']
conv3_block4_1_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block4_1_conv[0][0]']
ization)
conv3_block4_1_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block4_1_bn[0][0]']
n)
conv3_block4_2_conv (Conv2D) (None, 10, 10, 128) 147584 ['conv3_block4_1_relu[0][0]']
conv3_block4_2_bn (BatchNormal (None, 10, 10, 128) 512 ['conv3_block4_2_conv[0][0]']
ization)
conv3_block4_2_relu (Activatio (None, 10, 10, 128) 0 ['conv3_block4_2_bn[0][0]']
n)
conv3_block4_3_conv (Conv2D) (None, 10, 10, 512) 66048 ['conv3_block4_2_relu[0][0]']
conv3_block4_3_bn (BatchNormal (None, 10, 10, 512) 2048 ['conv3_block4_3_conv[0][0]']
ization)
conv3_block4_add (Add) (None, 10, 10, 512) 0 ['conv3_block3_out[0][0]',
'conv3_block4_3_bn[0][0]']
conv3_block4_out (Activation) (None, 10, 10, 512) 0 ['conv3_block4_add[0][0]']
conv4_block1_1_conv (Conv2D) (None, 5, 5, 256) 131328 ['conv3_block4_out[0][0]']
conv4_block1_1_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block1_1_conv[0][0]']
ization)
conv4_block1_1_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block1_1_bn[0][0]']
n)
conv4_block1_2_conv (Conv2D) (None, 5, 5, 256) 590080 ['conv4_block1_1_relu[0][0]']
conv4_block1_2_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block1_2_conv[0][0]']
ization)
conv4_block1_2_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block1_2_bn[0][0]']
n)
conv4_block1_0_conv (Conv2D) (None, 5, 5, 1024) 525312 ['conv3_block4_out[0][0]']
conv4_block1_3_conv (Conv2D) (None, 5, 5, 1024) 263168 ['conv4_block1_2_relu[0][0]']
conv4_block1_0_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block1_0_conv[0][0]']
ization)
conv4_block1_3_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block1_3_conv[0][0]']
ization)
conv4_block1_add (Add) (None, 5, 5, 1024) 0 ['conv4_block1_0_bn[0][0]',
'conv4_block1_3_bn[0][0]']
conv4_block1_out (Activation) (None, 5, 5, 1024) 0 ['conv4_block1_add[0][0]']
conv4_block2_1_conv (Conv2D) (None, 5, 5, 256) 262400 ['conv4_block1_out[0][0]']
conv4_block2_1_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block2_1_conv[0][0]']
ization)
conv4_block2_1_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block2_1_bn[0][0]']
n)
conv4_block2_2_conv (Conv2D) (None, 5, 5, 256) 590080 ['conv4_block2_1_relu[0][0]']
conv4_block2_2_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block2_2_conv[0][0]']
ization)
conv4_block2_2_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block2_2_bn[0][0]']
n)
conv4_block2_3_conv (Conv2D) (None, 5, 5, 1024) 263168 ['conv4_block2_2_relu[0][0]']
conv4_block2_3_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block2_3_conv[0][0]']
ization)
conv4_block2_add (Add) (None, 5, 5, 1024) 0 ['conv4_block1_out[0][0]',
'conv4_block2_3_bn[0][0]']
conv4_block2_out (Activation) (None, 5, 5, 1024) 0 ['conv4_block2_add[0][0]']
conv4_block3_1_conv (Conv2D) (None, 5, 5, 256) 262400 ['conv4_block2_out[0][0]']
conv4_block3_1_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block3_1_conv[0][0]']
ization)
conv4_block3_1_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block3_1_bn[0][0]']
n)
conv4_block3_2_conv (Conv2D) (None, 5, 5, 256) 590080 ['conv4_block3_1_relu[0][0]']
conv4_block3_2_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block3_2_conv[0][0]']
ization)
conv4_block3_2_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block3_2_bn[0][0]']
n)
conv4_block3_3_conv (Conv2D) (None, 5, 5, 1024) 263168 ['conv4_block3_2_relu[0][0]']
conv4_block3_3_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block3_3_conv[0][0]']
ization)
conv4_block3_add (Add) (None, 5, 5, 1024) 0 ['conv4_block2_out[0][0]',
'conv4_block3_3_bn[0][0]']
conv4_block3_out (Activation) (None, 5, 5, 1024) 0 ['conv4_block3_add[0][0]']
conv4_block4_1_conv (Conv2D) (None, 5, 5, 256) 262400 ['conv4_block3_out[0][0]']
conv4_block4_1_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block4_1_conv[0][0]']
ization)
conv4_block4_1_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block4_1_bn[0][0]']
n)
conv4_block4_2_conv (Conv2D) (None, 5, 5, 256) 590080 ['conv4_block4_1_relu[0][0]']
conv4_block4_2_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block4_2_conv[0][0]']
ization)
conv4_block4_2_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block4_2_bn[0][0]']
n)
conv4_block4_3_conv (Conv2D) (None, 5, 5, 1024) 263168 ['conv4_block4_2_relu[0][0]']
conv4_block4_3_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block4_3_conv[0][0]']
ization)
conv4_block4_add (Add) (None, 5, 5, 1024) 0 ['conv4_block3_out[0][0]',
'conv4_block4_3_bn[0][0]']
conv4_block4_out (Activation) (None, 5, 5, 1024) 0 ['conv4_block4_add[0][0]']
conv4_block5_1_conv (Conv2D) (None, 5, 5, 256) 262400 ['conv4_block4_out[0][0]']
conv4_block5_1_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block5_1_conv[0][0]']
ization)
conv4_block5_1_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block5_1_bn[0][0]']
n)
conv4_block5_2_conv (Conv2D) (None, 5, 5, 256) 590080 ['conv4_block5_1_relu[0][0]']
conv4_block5_2_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block5_2_conv[0][0]']
ization)
conv4_block5_2_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block5_2_bn[0][0]']
n)
conv4_block5_3_conv (Conv2D) (None, 5, 5, 1024) 263168 ['conv4_block5_2_relu[0][0]']
conv4_block5_3_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block5_3_conv[0][0]']
ization)
conv4_block5_add (Add) (None, 5, 5, 1024) 0 ['conv4_block4_out[0][0]',
'conv4_block5_3_bn[0][0]']
conv4_block5_out (Activation) (None, 5, 5, 1024) 0 ['conv4_block5_add[0][0]']
conv4_block6_1_conv (Conv2D) (None, 5, 5, 256) 262400 ['conv4_block5_out[0][0]']
conv4_block6_1_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block6_1_conv[0][0]']
ization)
conv4_block6_1_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block6_1_bn[0][0]']
n)
conv4_block6_2_conv (Conv2D) (None, 5, 5, 256) 590080 ['conv4_block6_1_relu[0][0]']
conv4_block6_2_bn (BatchNormal (None, 5, 5, 256) 1024 ['conv4_block6_2_conv[0][0]']
ization)
conv4_block6_2_relu (Activatio (None, 5, 5, 256) 0 ['conv4_block6_2_bn[0][0]']
n)
conv4_block6_3_conv (Conv2D) (None, 5, 5, 1024) 263168 ['conv4_block6_2_relu[0][0]']
conv4_block6_3_bn (BatchNormal (None, 5, 5, 1024) 4096 ['conv4_block6_3_conv[0][0]']
ization)
conv4_block6_add (Add) (None, 5, 5, 1024) 0 ['conv4_block5_out[0][0]',
'conv4_block6_3_bn[0][0]']
conv4_block6_out (Activation) (None, 5, 5, 1024) 0 ['conv4_block6_add[0][0]']
conv5_block1_1_conv (Conv2D) (None, 3, 3, 512) 524800 ['conv4_block6_out[0][0]']
conv5_block1_1_bn (BatchNormal (None, 3, 3, 512) 2048 ['conv5_block1_1_conv[0][0]']
ization)
conv5_block1_1_relu (Activatio (None, 3, 3, 512) 0 ['conv5_block1_1_bn[0][0]']
n)
conv5_block1_2_conv (Conv2D) (None, 3, 3, 512) 2359808 ['conv5_block1_1_relu[0][0]']
conv5_block1_2_bn (BatchNormal (None, 3, 3, 512) 2048 ['conv5_block1_2_conv[0][0]']
ization)
conv5_block1_2_relu (Activatio (None, 3, 3, 512) 0 ['conv5_block1_2_bn[0][0]']
n)
conv5_block1_0_conv (Conv2D) (None, 3, 3, 2048) 2099200 ['conv4_block6_out[0][0]']
conv5_block1_3_conv (Conv2D) (None, 3, 3, 2048) 1050624 ['conv5_block1_2_relu[0][0]']
conv5_block1_0_bn (BatchNormal (None, 3, 3, 2048) 8192 ['conv5_block1_0_conv[0][0]']
ization)
conv5_block1_3_bn (BatchNormal (None, 3, 3, 2048) 8192 ['conv5_block1_3_conv[0][0]']
ization)
conv5_block1_add (Add) (None, 3, 3, 2048) 0 ['conv5_block1_0_bn[0][0]',
'conv5_block1_3_bn[0][0]']
conv5_block1_out (Activation) (None, 3, 3, 2048) 0 ['conv5_block1_add[0][0]']
conv5_block2_1_conv (Conv2D) (None, 3, 3, 512) 1049088 ['conv5_block1_out[0][0]']
conv5_block2_1_bn (BatchNormal (None, 3, 3, 512) 2048 ['conv5_block2_1_conv[0][0]']
ization)
conv5_block2_1_relu (Activatio (None, 3, 3, 512) 0 ['conv5_block2_1_bn[0][0]']
n)
conv5_block2_2_conv (Conv2D) (None, 3, 3, 512) 2359808 ['conv5_block2_1_relu[0][0]']
conv5_block2_2_bn (BatchNormal (None, 3, 3, 512) 2048 ['conv5_block2_2_conv[0][0]']
ization)
conv5_block2_2_relu (Activatio (None, 3, 3, 512) 0 ['conv5_block2_2_bn[0][0]']
n)
conv5_block2_3_conv (Conv2D) (None, 3, 3, 2048) 1050624 ['conv5_block2_2_relu[0][0]']
conv5_block2_3_bn (BatchNormal (None, 3, 3, 2048) 8192 ['conv5_block2_3_conv[0][0]']
ization)
conv5_block2_add (Add) (None, 3, 3, 2048) 0 ['conv5_block1_out[0][0]',
'conv5_block2_3_bn[0][0]']
conv5_block2_out (Activation) (None, 3, 3, 2048) 0 ['conv5_block2_add[0][0]']
conv5_block3_1_conv (Conv2D) (None, 3, 3, 512) 1049088 ['conv5_block2_out[0][0]']
conv5_block3_1_bn (BatchNormal (None, 3, 3, 512) 2048 ['conv5_block3_1_conv[0][0]']
ization)
conv5_block3_1_relu (Activatio (None, 3, 3, 512) 0 ['conv5_block3_1_bn[0][0]']
n)
conv5_block3_2_conv (Conv2D) (None, 3, 3, 512) 2359808 ['conv5_block3_1_relu[0][0]']
conv5_block3_2_bn (BatchNormal (None, 3, 3, 512) 2048 ['conv5_block3_2_conv[0][0]']
ization)
conv5_block3_2_relu (Activatio (None, 3, 3, 512) 0 ['conv5_block3_2_bn[0][0]']
n)
conv5_block3_3_conv (Conv2D) (None, 3, 3, 2048) 1050624 ['conv5_block3_2_relu[0][0]']
conv5_block3_3_bn (BatchNormal (None, 3, 3, 2048) 8192 ['conv5_block3_3_conv[0][0]']
ization)
conv5_block3_add (Add) (None, 3, 3, 2048) 0 ['conv5_block2_out[0][0]',
'conv5_block3_3_bn[0][0]']
conv5_block3_out (Activation) (None, 3, 3, 2048) 0 ['conv5_block3_add[0][0]']
global_average_pooling2d (Glob (None, 2048) 0 ['conv5_block3_out[0][0]']
alAveragePooling2D)
1stFC (Dense) (None, 512) 1049088 ['global_average_pooling2d[0][0]'
]
2ndFC (Dense) (None, 128) 65664 ['1stFC[0][0]']
lastFC (Dense) (None, 10) 1290 ['2ndFC[0][0]']
==================================================================================================
Total params: 24,703,754
Trainable params: 24,650,634
Non-trainable params: 53,120
__________________________________________________________________________________________________
None
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:133: UserWarning: `Model.fit_generator` is deprecated and will be removed in a future version. Please use `Model.fit`, which supports generators.
Epoch 1/1000 312/312 [==============================] - ETA: 0s - loss: 0.8308 - accuracy: 0.0998
/usr/local/lib/python3.7/dist-packages/keras/engine/functional.py:1410: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument. layer_config = serialize_layer_fn(layer)
312/312 [==============================] - 178s 512ms/step - loss: 0.8308 - accuracy: 0.0998 - val_loss: 3.5028 - val_accuracy: 0.0000e+00 Epoch 2/1000 312/312 [==============================] - 156s 500ms/step - loss: 0.5342 - accuracy: 0.1011 - val_loss: 2.9883 - val_accuracy: 0.5008 Epoch 3/1000 312/312 [==============================] - 156s 500ms/step - loss: 0.4585 - accuracy: 0.1018 - val_loss: 1.6017 - val_accuracy: 0.0622 Epoch 4/1000 312/312 [==============================] - 157s 501ms/step - loss: 0.4018 - accuracy: 0.1013 - val_loss: 0.5562 - val_accuracy: 0.1259 Epoch 5/1000 312/312 [==============================] - 155s 496ms/step - loss: 0.3592 - accuracy: 0.1016 - val_loss: 0.7849 - val_accuracy: 0.1525 Epoch 6/1000 312/312 [==============================] - 161s 516ms/step - loss: 0.3360 - accuracy: 0.1018 - val_loss: 1.1961 - val_accuracy: 0.2551 Epoch 7/1000 312/312 [==============================] - 155s 495ms/step - loss: 0.3037 - accuracy: 0.1008 - val_loss: 0.9513 - val_accuracy: 0.1201 Epoch 8/1000 312/312 [==============================] - 155s 495ms/step - loss: 0.2864 - accuracy: 0.1012 - val_loss: 0.9103 - val_accuracy: 0.0667 Epoch 9/1000 312/312 [==============================] - 156s 500ms/step - loss: 0.2663 - accuracy: 0.1016 - val_loss: 0.4991 - val_accuracy: 0.1191 Epoch 10/1000 312/312 [==============================] - 155s 496ms/step - loss: 0.2489 - accuracy: 0.1011 - val_loss: 0.6003 - val_accuracy: 0.1113 Epoch 11/1000 312/312 [==============================] - 154s 494ms/step - loss: 0.2379 - accuracy: 0.1008 - val_loss: 1.1253 - val_accuracy: 0.2452 Epoch 12/1000 312/312 [==============================] - 156s 500ms/step - loss: 0.2220 - accuracy: 0.1004 - val_loss: 0.4610 - val_accuracy: 0.1313 Epoch 13/1000 312/312 [==============================] - 155s 496ms/step - loss: 0.2100 - accuracy: 0.1009 - val_loss: 0.6780 - val_accuracy: 0.0886 Epoch 14/1000 312/312 [==============================] - 155s 496ms/step - loss: 0.2057 - accuracy: 0.1010 - val_loss: 4.0761 - val_accuracy: 0.0863 Epoch 15/1000 312/312 [==============================] - 154s 494ms/step - loss: 0.1918 - accuracy: 0.1003 - val_loss: 0.6654 - val_accuracy: 0.1416 Epoch 16/1000 312/312 [==============================] - 155s 496ms/step - loss: 0.1766 - accuracy: 0.0999 - val_loss: 0.5112 - val_accuracy: 0.1096 Epoch 17/1000 312/312 [==============================] - 155s 495ms/step - loss: 0.1712 - accuracy: 0.1004 - val_loss: 0.8940 - val_accuracy: 0.0993 313/313 [==============================] - 16s 51ms/step - loss: 0.4729 - accuracy: 0.1388
model.add(keras.layers.Dense(3,activation = 'relu', input_shape=(2,)))
model.add(keras.layers.Dense(1, activation='sigmoid'))
input = keras.layers.Input(shape=(2,), name='Input_layer')
x = keras.layers.Dense(3, activation='relu', name='1stFC')(input)
output = keras.layers.Dense(1, activation='sigmoid', name='lastFC')(x)
model = keras.models.Model(inputs=input, outputs=output)
weights : 9개
bias : 4개
total parameters : 13개
(None, 24, 24, 32)
(None, 12, 12, 32)
(None, 12, 12, 64)
(None, 6, 6, 64)
참고로 Flatten의 2304는 6 x 6 x 64를 의미
(5,5)
for i in range(100,125) :
(5, 5, i - 100 + 1)
(train_images[i])
6번
ResNet50( include_top=False, input_shape=(32,32,3) )
base_model.output
inputs=base_model.input, outputs=output
7번
for layer in base_model.layers:
layer.trainable = False
8번
for i in range(50) :
base_model.layers[i].trainable = False
for (i,layer) in enumerate(base_model.layers) :
if i < 50 :
layer.trainable = False
train_test_split(data, label, test_size = 0.2, stratify = label, random_state=1)
train_test_split(x, y, test_size = 0.2, stratify = y, random_state=1)
The number is 1 0
The number is 3 0
for i in range(1,5,2) :
for j in range(5) :
if j%2 == 1:
break
print("The number is ", i, j)
The number is 1 0 The number is 3 0
The number is 1 0
The number is 1 2
The number is 1 4
The number is 3 0
The number is 3 2
The number is 3 4
for i in range(1,5,2) :
for j in range(5) :
if j%2 == 1:
continue
print("The number is ", i, j)
The number is 1 0 The number is 1 2 The number is 1 4 The number is 3 0 The number is 3 2 The number is 3 4